aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Analysis/Loads.cpp4
-rw-r--r--llvm/lib/Analysis/MemoryLocation.cpp16
-rw-r--r--llvm/lib/Analysis/StackLifetime.cpp42
-rw-r--r--llvm/lib/Analysis/ValueTracking.cpp27
-rw-r--r--llvm/lib/Bitcode/Writer/BitcodeWriter.cpp2
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp6
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp2
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp3
-rw-r--r--llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp4
-rw-r--r--llvm/lib/CodeGen/CodeGenPrepare.cpp2
-rw-r--r--llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp2
-rw-r--r--llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp44
-rw-r--r--llvm/lib/CodeGen/GlobalISel/Utils.cpp22
-rw-r--r--llvm/lib/CodeGen/RegisterCoalescer.cpp189
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp126
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp5
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp3
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp4
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp28
-rw-r--r--llvm/lib/CodeGen/TargetInstrInfo.cpp2
-rw-r--r--llvm/lib/CodeGen/TargetLoweringBase.cpp3
-rw-r--r--llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp25
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFContext.cpp2
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp2
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp2
-rw-r--r--llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp2
-rw-r--r--llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp2
-rw-r--r--llvm/lib/Frontend/HLSL/RootSignatureMetadata.cpp10
-rw-r--r--llvm/lib/Frontend/Offloading/CMakeLists.txt1
-rw-r--r--llvm/lib/Frontend/Offloading/PropertySet.cpp102
-rw-r--r--llvm/lib/IR/AsmWriter.cpp2
-rw-r--r--llvm/lib/IR/Instruction.cpp9
-rw-r--r--llvm/lib/IR/Mangler.cpp3
-rw-r--r--llvm/lib/IR/RuntimeLibcalls.cpp64
-rw-r--r--llvm/lib/IR/Verifier.cpp9
-rw-r--r--llvm/lib/MC/ELFObjectWriter.cpp32
-rw-r--r--llvm/lib/MC/GOFFObjectWriter.cpp2
-rw-r--r--llvm/lib/MC/MCAsmBackend.cpp5
-rw-r--r--llvm/lib/MC/MCAsmStreamer.cpp17
-rw-r--r--llvm/lib/MC/MCAssembler.cpp53
-rw-r--r--llvm/lib/MC/MCCodeView.cpp11
-rw-r--r--llvm/lib/MC/MCContext.cpp32
-rw-r--r--llvm/lib/MC/MCELFStreamer.cpp18
-rw-r--r--llvm/lib/MC/MCMachOStreamer.cpp14
-rw-r--r--llvm/lib/MC/MCObjectFileInfo.cpp42
-rw-r--r--llvm/lib/MC/MCObjectStreamer.cpp135
-rw-r--r--llvm/lib/MC/MCParser/AsmParser.cpp5
-rw-r--r--llvm/lib/MC/MCParser/COFFMasmParser.cpp3
-rw-r--r--llvm/lib/MC/MCParser/ELFAsmParser.cpp4
-rw-r--r--llvm/lib/MC/MCParser/WasmAsmParser.cpp8
-rw-r--r--llvm/lib/MC/MCSection.cpp6
-rw-r--r--llvm/lib/MC/MCWasmStreamer.cpp9
-rw-r--r--llvm/lib/MC/MCWin64EH.cpp3
-rw-r--r--llvm/lib/MC/MCWinCOFFStreamer.cpp30
-rw-r--r--llvm/lib/MC/MCXCOFFStreamer.cpp11
-rw-r--r--llvm/lib/MC/MachObjectWriter.cpp25
-rw-r--r--llvm/lib/MC/WasmObjectWriter.cpp33
-rw-r--r--llvm/lib/MC/WinCOFFObjectWriter.cpp13
-rw-r--r--llvm/lib/MC/XCOFFObjectWriter.cpp9
-rw-r--r--llvm/lib/Object/Binary.cpp2
-rw-r--r--llvm/lib/Object/DXContainer.cpp181
-rw-r--r--llvm/lib/Object/Object.cpp2
-rw-r--r--llvm/lib/Object/ObjectFile.cpp4
-rw-r--r--llvm/lib/Object/SymbolicFile.cpp2
-rw-r--r--llvm/lib/ProfileData/InstrProf.cpp3
-rw-r--r--llvm/lib/SandboxIR/Value.cpp2
-rw-r--r--llvm/lib/Support/BalancedPartitioning.cpp2
-rw-r--r--llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp18
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.h4
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrFormats.td21
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp142
-rw-r--r--llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp23
-rw-r--r--llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp7
-rw-r--r--llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp9
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPU.td10
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUGISel.td19
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp57
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h3
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp20
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h10
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp4
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp92
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h14
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp41
-rw-r--r--llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp84
-rw-r--r--llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp7
-rw-r--r--llvm/lib/Target/AMDGPU/GCNSubtarget.h3
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp14
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp69
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h13
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp5
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp10
-rw-r--r--llvm/lib/Target/AMDGPU/SIDefines.h11
-rw-r--r--llvm/lib/Target/AMDGPU/SIFoldOperands.cpp9
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.td59
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstructions.td17
-rw-r--r--llvm/lib/Target/AMDGPU/SIRegisterInfo.td1
-rw-r--r--llvm/lib/Target/AMDGPU/VOP1Instructions.td12
-rw-r--r--llvm/lib/Target/AMDGPU/VOP3Instructions.td95
-rw-r--r--llvm/lib/Target/AMDGPU/VOP3PInstructions.td246
-rw-r--r--llvm/lib/Target/AMDGPU/VOPInstructions.td28
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.cpp8
-rw-r--r--llvm/lib/Target/ARM/ARMTargetMachine.cpp11
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp18
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h3
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp4
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp10
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp2
-rw-r--r--llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp11
-rw-r--r--llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h3
-rw-r--r--llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp29
-rw-r--r--llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.cpp19
-rw-r--r--llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.h3
-rw-r--r--llvm/lib/Target/CSKY/MCTargetDesc/CSKYELFObjectWriter.cpp4
-rw-r--r--llvm/lib/Target/CSKY/MCTargetDesc/CSKYELFStreamer.cpp3
-rw-r--r--llvm/lib/Target/DirectX/DXContainerGlobals.cpp2
-rw-r--r--llvm/lib/Target/DirectX/MCTargetDesc/DirectXMCTargetDesc.cpp3
-rw-r--r--llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp2
-rw-r--r--llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp2
-rw-r--r--llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp15
-rw-r--r--llvm/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp4
-rw-r--r--llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp4
-rw-r--r--llvm/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp14
-rw-r--r--llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp49
-rw-r--r--llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h3
-rw-r--r--llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp4
-rw-r--r--llvm/lib/Target/M68k/MCTargetDesc/M68kAsmBackend.cpp11
-rw-r--r--llvm/lib/Target/M68k/MCTargetDesc/M68kELFObjectWriter.cpp4
-rw-r--r--llvm/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp15
-rw-r--r--llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp7
-rw-r--r--llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp10
-rw-r--r--llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h3
-rw-r--r--llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp9
-rw-r--r--llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp2
-rw-r--r--llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp12
-rw-r--r--llvm/lib/Target/Mips/MipsAsmPrinter.cpp3
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXInstrInfo.td26
-rw-r--r--llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp2
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp20
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp7
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp2
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp10
-rw-r--r--llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp20
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.cpp9
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.h2
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp51
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h3
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp4
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp5
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVFixupKinds.h21
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp19
-rw-r--r--llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp5
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.cpp144
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.h5
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfo.td7
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td55
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td12
-rw-r--r--llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp93
-rw-r--r--llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.h6
-rw-r--r--llvm/lib/Target/RISCV/RISCVTargetMachine.cpp6
-rw-r--r--llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp10
-rw-r--r--llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h4
-rw-r--r--llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp25
-rw-r--r--llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVAsmBackend.cpp3
-rw-r--r--llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVInstPrinter.cpp2
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp65
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp4
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp5
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp4
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp13
-rw-r--r--llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp11
-rw-r--r--llvm/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp4
-rw-r--r--llvm/lib/Target/SystemZ/MCTargetDesc/SystemZELFObjectWriter.cpp4
-rw-r--r--llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp13
-rw-r--r--llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp4
-rw-r--r--llvm/lib/Target/VE/MCTargetDesc/VEAsmBackend.cpp11
-rw-r--r--llvm/lib/Target/VE/MCTargetDesc/VEELFObjectWriter.cpp4
-rw-r--r--llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp29
-rw-r--r--llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.cpp6
-rw-r--r--llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp2
-rw-r--r--llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp11
-rw-r--r--llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp6
-rw-r--r--llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp2
-rw-r--r--llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp2
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp21
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp14
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp8
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp15
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp4
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp2
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp72
-rw-r--r--llvm/lib/Target/X86/X86ISelLoweringCall.cpp47
-rw-r--r--llvm/lib/Target/Xtensa/MCTargetDesc/XtensaAsmBackend.cpp11
-rw-r--r--llvm/lib/TargetParser/ARMTargetParser.cpp10
-rw-r--r--llvm/lib/TargetParser/TargetParser.cpp18
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp31
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp15
-rw-r--r--llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp4
-rw-r--r--llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp5
-rw-r--r--llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp3
-rw-r--r--llvm/lib/Transforms/Scalar/GVN.cpp13
-rw-r--r--llvm/lib/Transforms/Scalar/GVNSink.cpp2
-rw-r--r--llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp2
-rw-r--r--llvm/lib/Transforms/Scalar/SROA.cpp15
-rw-r--r--llvm/lib/Transforms/Utils/CMakeLists.txt1
-rw-r--r--llvm/lib/Transforms/Utils/Local.cpp34
-rw-r--r--llvm/lib/Transforms/Utils/MemoryTaggingSupport.cpp5
-rw-r--r--llvm/lib/Transforms/Utils/PredicateInfo.cpp2
-rw-r--r--llvm/lib/Transforms/Utils/ProfileVerify.cpp30
-rw-r--r--llvm/lib/Transforms/Utils/SimplifyCFG.cpp16
-rw-r--r--llvm/lib/Transforms/Utils/SplitModuleByCategory.cpp323
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h14
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp115
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp22
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.cpp28
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.h44
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp1
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanCFG.h7
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp16
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp114
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp118
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanTransforms.h5
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp33
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanUtils.cpp7
225 files changed, 3220 insertions, 1779 deletions
diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp
index 6fc81d787..da76f5b 100644
--- a/llvm/lib/Analysis/Loads.cpp
+++ b/llvm/lib/Analysis/Loads.cpp
@@ -833,6 +833,10 @@ bool llvm::canReplacePointersInUseIfEqual(const Use &U, const Value *To,
if (!To->getType()->isPointerTy())
return true;
+ // Do not perform replacements in lifetime intrinsic arguments.
+ if (isa<LifetimeIntrinsic>(U.getUser()))
+ return false;
+
if (isPointerAlwaysReplaceable(&*U, To, DL))
return true;
return isPointerUseReplacable(U);
diff --git a/llvm/lib/Analysis/MemoryLocation.cpp b/llvm/lib/Analysis/MemoryLocation.cpp
index c8daab7a..28a2640 100644
--- a/llvm/lib/Analysis/MemoryLocation.cpp
+++ b/llvm/lib/Analysis/MemoryLocation.cpp
@@ -190,7 +190,21 @@ MemoryLocation MemoryLocation::getForArgument(const CallBase *Call,
return MemoryLocation::getAfter(Arg, AATags);
case Intrinsic::lifetime_start:
- case Intrinsic::lifetime_end:
+ case Intrinsic::lifetime_end: {
+ assert(ArgIdx == 1 && "Invalid argument index");
+ auto *AI = dyn_cast<AllocaInst>(Arg);
+ if (!AI)
+ // lifetime of poison value.
+ return MemoryLocation::getBeforeOrAfter(Arg);
+
+ std::optional<TypeSize> AllocSize =
+ AI->getAllocationSize(II->getDataLayout());
+ return MemoryLocation(Arg,
+ AllocSize ? LocationSize::precise(*AllocSize)
+ : LocationSize::afterPointer(),
+ AATags);
+ }
+
case Intrinsic::invariant_start:
assert(ArgIdx == 1 && "Invalid argument index");
return MemoryLocation(
diff --git a/llvm/lib/Analysis/StackLifetime.cpp b/llvm/lib/Analysis/StackLifetime.cpp
index 34a7a04..abe4985 100644
--- a/llvm/lib/Analysis/StackLifetime.cpp
+++ b/llvm/lib/Analysis/StackLifetime.cpp
@@ -59,44 +59,20 @@ bool StackLifetime::isAliveAfter(const AllocaInst *AI,
return getLiveRange(AI).test(InstNum);
}
-// Returns unique alloca annotated by lifetime marker only if
-// markers has the same size and points to the alloca start.
-static const AllocaInst *findMatchingAlloca(const IntrinsicInst &II,
- const DataLayout &DL) {
- const AllocaInst *AI = cast<AllocaInst>(II.getArgOperand(1));
- auto AllocaSize = AI->getAllocationSize(DL);
- if (!AllocaSize)
- return nullptr;
-
- auto *Size = dyn_cast<ConstantInt>(II.getArgOperand(0));
- if (!Size)
- return nullptr;
- int64_t LifetimeSize = Size->getSExtValue();
-
- if (LifetimeSize != -1 && uint64_t(LifetimeSize) != *AllocaSize)
- return nullptr;
-
- return AI;
-}
-
void StackLifetime::collectMarkers() {
InterestingAllocas.resize(NumAllocas);
DenseMap<const BasicBlock *, SmallDenseMap<const IntrinsicInst *, Marker>>
BBMarkerSet;
- const DataLayout &DL = F.getDataLayout();
-
// Compute the set of start/end markers per basic block.
for (const BasicBlock *BB : depth_first(&F)) {
for (const Instruction &I : *BB) {
const IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I);
if (!II || !II->isLifetimeStartOrEnd())
continue;
- const AllocaInst *AI = findMatchingAlloca(*II, DL);
- if (!AI) {
- HasUnknownLifetimeStartOrEnd = true;
+ const AllocaInst *AI = dyn_cast<AllocaInst>(II->getArgOperand(1));
+ if (!AI)
continue;
- }
auto It = AllocaNumbering.find(AI);
if (It == AllocaNumbering.end())
continue;
@@ -325,20 +301,6 @@ StackLifetime::StackLifetime(const Function &F,
}
void StackLifetime::run() {
- if (HasUnknownLifetimeStartOrEnd) {
- // There is marker which we can't assign to a specific alloca, so we
- // fallback to the most conservative results for the type.
- switch (Type) {
- case LivenessType::May:
- LiveRanges.resize(NumAllocas, getFullLiveRange());
- break;
- case LivenessType::Must:
- LiveRanges.resize(NumAllocas, LiveRange(Instructions.size()));
- break;
- }
- return;
- }
-
LiveRanges.resize(NumAllocas, LiveRange(Instructions.size()));
for (unsigned I = 0; I < NumAllocas; ++I)
if (!InterestingAllocas.test(I))
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index af85ce4..1e70228 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -1351,6 +1351,8 @@ static void computeKnownBitsFromOperator(const Operator *I,
isa<ScalableVectorType>(I->getType()))
break;
+ unsigned NumElts = DemandedElts.getBitWidth();
+ bool IsLE = Q.DL.isLittleEndian();
// Look through a cast from narrow vector elements to wider type.
// Examples: v4i32 -> v2i64, v3i8 -> v24
unsigned SubBitWidth = SrcVecTy->getScalarSizeInBits();
@@ -1369,7 +1371,6 @@ static void computeKnownBitsFromOperator(const Operator *I,
//
// The known bits of each sub-element are then inserted into place
// (dependent on endian) to form the full result of known bits.
- unsigned NumElts = DemandedElts.getBitWidth();
unsigned SubScale = BitWidth / SubBitWidth;
APInt SubDemandedElts = APInt::getZero(NumElts * SubScale);
for (unsigned i = 0; i != NumElts; ++i) {
@@ -1381,10 +1382,32 @@ static void computeKnownBitsFromOperator(const Operator *I,
for (unsigned i = 0; i != SubScale; ++i) {
computeKnownBits(I->getOperand(0), SubDemandedElts.shl(i), KnownSrc, Q,
Depth + 1);
- unsigned ShiftElt = Q.DL.isLittleEndian() ? i : SubScale - 1 - i;
+ unsigned ShiftElt = IsLE ? i : SubScale - 1 - i;
Known.insertBits(KnownSrc, ShiftElt * SubBitWidth);
}
}
+ // Look through a cast from wider vector elements to narrow type.
+ // Examples: v2i64 -> v4i32
+ if (SubBitWidth % BitWidth == 0) {
+ unsigned SubScale = SubBitWidth / BitWidth;
+ KnownBits KnownSrc(SubBitWidth);
+ APInt SubDemandedElts =
+ APIntOps::ScaleBitMask(DemandedElts, NumElts / SubScale);
+ computeKnownBits(I->getOperand(0), SubDemandedElts, KnownSrc, Q,
+ Depth + 1);
+
+ Known.Zero.setAllBits();
+ Known.One.setAllBits();
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (DemandedElts[i]) {
+ unsigned Shifts = IsLE ? i : NumElts - 1 - i;
+ unsigned Offset = (Shifts % SubScale) * BitWidth;
+ Known = Known.intersectWith(KnownSrc.extractBits(BitWidth, Offset));
+ if (Known.isUnknown())
+ break;
+ }
+ }
+ }
break;
}
case Instruction::SExt: {
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index 7e0d81f..05680fa 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -610,7 +610,7 @@ public:
std::vector<StringRef> ModulePaths;
for (auto &[ModPath, _] : Index.modulePaths())
ModulePaths.push_back(ModPath);
- llvm::sort(ModulePaths.begin(), ModulePaths.end());
+ llvm::sort(ModulePaths);
for (auto &ModPath : ModulePaths)
Callback(*Index.modulePaths().find(ModPath));
}
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 1641c3e..c72b6e8 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -3194,7 +3194,7 @@ void AsmPrinter::emitJumpTableSizesSection(const MachineJumpTableInfo &MJTI,
return;
if (isElf) {
- MCSymbolELF *LinkedToSym = dyn_cast<MCSymbolELF>(CurrentFnSym);
+ auto *LinkedToSym = static_cast<MCSymbolELF *>(CurrentFnSym);
int Flags = F.hasComdat() ? static_cast<int>(ELF::SHF_GROUP) : 0;
JumpTableSizesSection = OutContext.getELFSection(
@@ -4702,7 +4702,7 @@ void AsmPrinter::emitXRayTable() {
const Triple &TT = TM.getTargetTriple();
// Use PC-relative addresses on all targets.
if (TT.isOSBinFormatELF()) {
- auto LinkedToSym = cast<MCSymbolELF>(CurrentFnSym);
+ auto LinkedToSym = static_cast<const MCSymbolELF *>(CurrentFnSym);
auto Flags = ELF::SHF_ALLOC | ELF::SHF_LINK_ORDER;
StringRef GroupName;
if (F.hasComdat()) {
@@ -4825,7 +4825,7 @@ void AsmPrinter::emitPatchableFunctionEntries() {
Flags |= ELF::SHF_GROUP;
GroupName = F.getComdat()->getName();
}
- LinkedToSym = cast<MCSymbolELF>(CurrentFnSym);
+ LinkedToSym = static_cast<const MCSymbolELF *>(CurrentFnSym);
}
OutStreamer->switchSection(OutContext.getELFSection(
SectionName, ELF::SHT_PROGBITS, Flags, 0, GroupName, F.hasComdat(),
diff --git a/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp b/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp
index 08ed78e..a7491a2 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp
@@ -230,7 +230,7 @@ void DIEHash::hashBlockData(const DIE::const_value_range &Values) {
"Base types referenced from DW_OP_convert should have a name");
hashNestedType(C, Name);
} else
- Hash.update((uint64_t)V.getDIEInteger().getValue());
+ Hash.update(V.getDIEInteger().getValue());
}
// Hash the contents of a loclistptr class.
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index 5577a7d..f9d7e76 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -508,7 +508,8 @@ void DwarfCompileUnit::addWasmRelocBaseGlobal(DIELoc *Loc, StringRef GlobalName,
// don't want to depend on target specific headers in this code?
const unsigned TI_GLOBAL_RELOC = 3;
unsigned PointerSize = Asm->getDataLayout().getPointerSize();
- auto *Sym = cast<MCSymbolWasm>(Asm->GetExternalSymbolSymbol(GlobalName));
+ auto *Sym =
+ static_cast<MCSymbolWasm *>(Asm->GetExternalSymbolSymbol(GlobalName));
// FIXME: this repeats what WebAssemblyMCInstLower::
// GetExternalSymbolSymbol does, since if there's no code that
// refers to this symbol, we have to set it here.
diff --git a/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp b/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp
index ff265b5..260ce8f 100644
--- a/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp
+++ b/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp
@@ -1403,7 +1403,7 @@ void AssignmentTrackingLowering::addMemDef(BlockInfo *LiveSet, VariableID Var,
const Assignment &AV) {
LiveSet->setAssignment(BlockInfo::Stack, Var, AV);
- // Use this assigment for all fragments contained within Var, but do not
+ // Use this assignment for all fragments contained within Var, but do not
// provide a Source because we cannot convert Var's value to a value for the
// fragment.
Assignment FragAV = AV;
@@ -1416,7 +1416,7 @@ void AssignmentTrackingLowering::addDbgDef(BlockInfo *LiveSet, VariableID Var,
const Assignment &AV) {
LiveSet->setAssignment(BlockInfo::Debug, Var, AV);
- // Use this assigment for all fragments contained within Var, but do not
+ // Use this assignment for all fragments contained within Var, but do not
// provide a Source because we cannot convert Var's value to a value for the
// fragment.
Assignment FragAV = AV;
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index f16283b..9223739 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -1834,7 +1834,7 @@ bool CodeGenPrepare::unfoldPowerOf2Test(CmpInst *Cmp) {
///
/// Return true if any changes are made.
static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI) {
- if (TLI.hasMultipleConditionRegisters())
+ if (TLI.hasMultipleConditionRegisters(EVT::getEVT(Cmp->getType())))
return false;
// Avoid sinking soft-FP comparisons, since this can move them into a loop.
diff --git a/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
index 0f2c580..59c62cf 100644
--- a/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
@@ -95,7 +95,7 @@ void CSEMIRBuilder::profileSrcOp(const SrcOp &Op,
GISelInstProfileBuilder &B) const {
switch (Op.getSrcOpKind()) {
case SrcOp::SrcType::Ty_Imm:
- B.addNodeIDImmediate(static_cast<int64_t>(Op.getImm()));
+ B.addNodeIDImmediate(Op.getImm());
break;
case SrcOp::SrcType::Ty_Predicate:
B.addNodeIDImmediate(static_cast<int64_t>(Op.getPredicate()));
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index fd38c30..bbfae57 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -1592,9 +1592,19 @@ bool IRTranslator::translateGetElementPtr(const User &U,
Type *OffsetIRTy = DL->getIndexType(PtrIRTy);
LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL);
- uint32_t Flags = 0;
+ uint32_t PtrAddFlags = 0;
+ // Each PtrAdd generated to implement the GEP inherits its nuw, nusw, inbounds
+ // flags.
if (const Instruction *I = dyn_cast<Instruction>(&U))
- Flags = MachineInstr::copyFlagsFromInstruction(*I);
+ PtrAddFlags = MachineInstr::copyFlagsFromInstruction(*I);
+
+ auto PtrAddFlagsWithConst = [&](int64_t Offset) {
+ // For nusw/inbounds GEP with an offset that is nonnegative when interpreted
+ // as signed, assume there is no unsigned overflow.
+ if (Offset >= 0 && (PtrAddFlags & MachineInstr::MIFlag::NoUSWrap))
+ return PtrAddFlags | MachineInstr::MIFlag::NoUWrap;
+ return PtrAddFlags;
+ };
// Normalize Vector GEP - all scalar operands should be converted to the
// splat vector.
@@ -1644,7 +1654,9 @@ bool IRTranslator::translateGetElementPtr(const User &U,
if (Offset != 0) {
auto OffsetMIB = MIRBuilder.buildConstant({OffsetTy}, Offset);
- BaseReg = MIRBuilder.buildPtrAdd(PtrTy, BaseReg, OffsetMIB.getReg(0))
+ BaseReg = MIRBuilder
+ .buildPtrAdd(PtrTy, BaseReg, OffsetMIB.getReg(0),
+ PtrAddFlagsWithConst(Offset))
.getReg(0);
Offset = 0;
}
@@ -1668,12 +1680,23 @@ bool IRTranslator::translateGetElementPtr(const User &U,
if (ElementSize != 1) {
auto ElementSizeMIB = MIRBuilder.buildConstant(
getLLTForType(*OffsetIRTy, *DL), ElementSize);
+
+ // The multiplication is NUW if the GEP is NUW and NSW if the GEP is
+ // NUSW.
+ uint32_t ScaleFlags = PtrAddFlags & MachineInstr::MIFlag::NoUWrap;
+ if (PtrAddFlags & MachineInstr::MIFlag::NoUSWrap)
+ ScaleFlags |= MachineInstr::MIFlag::NoSWrap;
+
GepOffsetReg =
- MIRBuilder.buildMul(OffsetTy, IdxReg, ElementSizeMIB).getReg(0);
- } else
+ MIRBuilder.buildMul(OffsetTy, IdxReg, ElementSizeMIB, ScaleFlags)
+ .getReg(0);
+ } else {
GepOffsetReg = IdxReg;
+ }
- BaseReg = MIRBuilder.buildPtrAdd(PtrTy, BaseReg, GepOffsetReg).getReg(0);
+ BaseReg =
+ MIRBuilder.buildPtrAdd(PtrTy, BaseReg, GepOffsetReg, PtrAddFlags)
+ .getReg(0);
}
}
@@ -1681,11 +1704,8 @@ bool IRTranslator::translateGetElementPtr(const User &U,
auto OffsetMIB =
MIRBuilder.buildConstant(OffsetTy, Offset);
- if (Offset >= 0 && cast<GEPOperator>(U).isInBounds())
- Flags |= MachineInstr::MIFlag::NoUWrap;
-
MIRBuilder.buildPtrAdd(getOrCreateVReg(U), BaseReg, OffsetMIB.getReg(0),
- Flags);
+ PtrAddFlagsWithConst(Offset));
return true;
}
@@ -2189,8 +2209,8 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
unsigned Op = ID == Intrinsic::lifetime_start ? TargetOpcode::LIFETIME_START
: TargetOpcode::LIFETIME_END;
- const AllocaInst *AI = cast<AllocaInst>(CI.getArgOperand(1));
- if (!AI->isStaticAlloca())
+ const AllocaInst *AI = dyn_cast<AllocaInst>(CI.getArgOperand(1));
+ if (!AI || !AI->isStaticAlloca())
return true;
MIRBuilder.buildInstr(Op).addFrameIndex(getOrCreateFrameIndex(*AI));
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index f48bfc0..8955dd0 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -1401,6 +1401,21 @@ bool llvm::isBuildVectorConstantSplat(const Register Reg,
return false;
}
+bool llvm::isBuildVectorConstantSplat(const Register Reg,
+ const MachineRegisterInfo &MRI,
+ APInt SplatValue, bool AllowUndef) {
+ if (auto SplatValAndReg = getAnyConstantSplat(Reg, MRI, AllowUndef)) {
+ if (SplatValAndReg->Value.getBitWidth() < SplatValue.getBitWidth())
+ return APInt::isSameValue(
+ SplatValAndReg->Value.sext(SplatValue.getBitWidth()), SplatValue);
+ return APInt::isSameValue(
+ SplatValAndReg->Value,
+ SplatValue.sext(SplatValAndReg->Value.getBitWidth()));
+ }
+
+ return false;
+}
+
bool llvm::isBuildVectorConstantSplat(const MachineInstr &MI,
const MachineRegisterInfo &MRI,
int64_t SplatValue, bool AllowUndef) {
@@ -1408,6 +1423,13 @@ bool llvm::isBuildVectorConstantSplat(const MachineInstr &MI,
AllowUndef);
}
+bool llvm::isBuildVectorConstantSplat(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI,
+ APInt SplatValue, bool AllowUndef) {
+ return isBuildVectorConstantSplat(MI.getOperand(0).getReg(), MRI, SplatValue,
+ AllowUndef);
+}
+
std::optional<APInt>
llvm::getIConstantSplatVal(const Register Reg, const MachineRegisterInfo &MRI) {
if (auto SplatValAndReg =
diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp
index 7ede564..514f2f0 100644
--- a/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -306,12 +306,7 @@ class RegisterCoalescer : private LiveRangeEdit::Delegate {
/// number if it is not zero. If DstReg is a physical register and the
/// existing subregister number of the def / use being updated is not zero,
/// make sure to set it to the correct physical subregister.
- ///
- /// If \p SubregToRegSrcInst is not empty, we are coalescing a
- /// `DstReg = SUBREG_TO_REG SrcReg`, which should introduce an
- /// implicit-def of DstReg on instructions that define SrcReg.
- void updateRegDefsUses(Register SrcReg, Register DstReg, unsigned SubIdx,
- ArrayRef<MachineInstr *> SubregToRegSrcInst = {});
+ void updateRegDefsUses(Register SrcReg, Register DstReg, unsigned SubIdx);
/// If the given machine operand reads only undefined lanes add an undef
/// flag.
@@ -1448,7 +1443,6 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
// CopyMI may have implicit operands, save them so that we can transfer them
// over to the newly materialized instruction after CopyMI is removed.
- LaneBitmask NewMIImplicitOpsMask;
SmallVector<MachineOperand, 4> ImplicitOps;
ImplicitOps.reserve(CopyMI->getNumOperands() -
CopyMI->getDesc().getNumOperands());
@@ -1463,9 +1457,6 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
(MO.getSubReg() == 0 && MO.getReg() == DstOperand.getReg())) &&
"unexpected implicit virtual register def");
ImplicitOps.push_back(MO);
- if (MO.isDef() && MO.getReg().isVirtual() &&
- MRI->shouldTrackSubRegLiveness(DstReg))
- NewMIImplicitOpsMask |= MRI->getMaxLaneMaskForVReg(MO.getReg());
}
}
@@ -1508,11 +1499,14 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
} else {
assert(MO.getReg() == NewMI.getOperand(0).getReg());
- // If lanemasks need to be tracked, compile the lanemask of the NewMI
- // implicit def operands to avoid subranges for the super-regs from
- // being removed by code later on in this function.
- if (MRI->shouldTrackSubRegLiveness(MO.getReg()))
- NewMIImplicitOpsMask |= MRI->getMaxLaneMaskForVReg(MO.getReg());
+ // We're only expecting another def of the main output, so the range
+ // should get updated with the regular output range.
+ //
+ // FIXME: The range updating below probably needs updating to look at
+ // the super register if subranges are tracked.
+ assert(!MRI->shouldTrackSubRegLiveness(DstReg) &&
+ "subrange update for implicit-def of super register may not be "
+ "properly handled");
}
}
}
@@ -1612,8 +1606,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
CurrIdx.getRegSlot(NewMI.getOperand(0).isEarlyClobber());
VNInfo::Allocator &Alloc = LIS->getVNInfoAllocator();
for (LiveInterval::SubRange &SR : DstInt.subranges()) {
- if ((SR.LaneMask & DstMask).none() &&
- (SR.LaneMask & NewMIImplicitOpsMask).none()) {
+ if ((SR.LaneMask & DstMask).none()) {
LLVM_DEBUG(dbgs()
<< "Removing undefined SubRange "
<< PrintLaneMask(SR.LaneMask) << " : " << SR << "\n");
@@ -1631,11 +1624,11 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
UpdatedSubRanges = true;
} else {
// We know that this lane is defined by this instruction,
- // but at this point it may be empty because it is not used by
- // anything. This happens when updateRegDefUses adds the missing
- // lanes. Assign that lane a dead def so that the interferences
- // are properly modeled.
- if (SR.empty())
+ // but at this point it might not be live because it was not defined
+ // by the original instruction. This happens when the
+ // rematerialization widens the defined register. Assign that lane a
+ // dead def so that the interferences are properly modeled.
+ if (!SR.liveAt(DefIndex))
SR.createDeadDef(DefIndex, Alloc);
}
}
@@ -1877,14 +1870,11 @@ void RegisterCoalescer::addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx,
}
}
-void RegisterCoalescer::updateRegDefsUses(
- Register SrcReg, Register DstReg, unsigned SubIdx,
- ArrayRef<MachineInstr *> SubregToRegSrcInsts) {
+void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
+ unsigned SubIdx) {
bool DstIsPhys = DstReg.isPhysical();
LiveInterval *DstInt = DstIsPhys ? nullptr : &LIS->getInterval(DstReg);
- // Coalescing a COPY may expose reads of 'undef' subregisters.
- // If so, then explicitly propagate 'undef' to those operands.
if (DstInt && DstInt->hasSubRanges() && DstReg != SrcReg) {
for (MachineOperand &MO : MRI->reg_operands(DstReg)) {
if (MO.isUndef())
@@ -1901,15 +1891,6 @@ void RegisterCoalescer::updateRegDefsUses(
}
}
- // If DstInt already has a subrange for the unused lanes, then we shouldn't
- // create duplicate subranges when we update the interval for unused lanes.
- LaneBitmask DstIntLaneMask;
- if (DstInt && MRI->shouldTrackSubRegLiveness(DstReg)) {
- for (LiveInterval::SubRange &SR : DstInt->subranges())
- DstIntLaneMask |= SR.LaneMask;
- }
-
- // Go through all instructions to replace uses of 'SrcReg' by 'DstReg'.
SmallPtrSet<MachineInstr *, 8> Visited;
for (MachineRegisterInfo::reg_instr_iterator I = MRI->reg_instr_begin(SrcReg),
E = MRI->reg_instr_end();
@@ -1933,80 +1914,6 @@ void RegisterCoalescer::updateRegDefsUses(
if (DstInt && !Reads && SubIdx && !UseMI->isDebugInstr())
Reads = DstInt->liveAt(LIS->getInstructionIndex(*UseMI));
- bool RequiresImplicitRedef = false;
- if (!SubregToRegSrcInsts.empty()) {
- // We can only add an implicit-def and undef if the sub registers match,
- // e.g.
- // %0:gr32 = INSTX
- // %0.sub8:gr32 = INSTY // top 24 bits of %0 still defined
- // %1:gr64 = SUBREG_TO_REG 0, %0, %subreg.sub32
- //
- // This cannot be transformed into:
- // %1.sub32:gr64 = INSTX
- // undef %1.sub8:gr64 = INSTY , implicit-def %1
- //
- // Because that would thrash the top 24 bits of %1.sub32.
- if (is_contained(SubregToRegSrcInsts, UseMI) &&
- all_of(UseMI->defs(),
- [&SubIdx, &SrcReg](const MachineOperand &MO) -> bool {
- if (MO.getReg() != SrcReg || !MO.getSubReg() || MO.isUndef())
- return true;
- return SubIdx == MO.getSubReg();
- })) {
- // Add implicit-def of super-register to express that the whole
- // register is defined by the instruction.
- MachineInstrBuilder MIB(*MF, UseMI);
- MIB.addReg(DstReg, RegState::ImplicitDefine);
- RequiresImplicitRedef = true;
- }
-
- // If the coalesed instruction doesn't fully define the register, we need
- // to preserve the original super register liveness for SUBREG_TO_REG.
- //
- // We pretended SUBREG_TO_REG was a regular copy for coalescing purposes,
- // but it introduces liveness for other subregisters. Downstream users may
- // have been relying on those bits, so we need to ensure their liveness is
- // captured with a def of other lanes.
- if (DstInt && MRI->shouldTrackSubRegLiveness(DstReg)) {
- // First check if there is sufficient granularity in terms of subranges.
- LaneBitmask DstMask = MRI->getMaxLaneMaskForVReg(DstInt->reg());
- LaneBitmask UsedLanes = TRI->getSubRegIndexLaneMask(SubIdx);
- LaneBitmask UnusedLanes = DstMask & ~UsedLanes;
- if ((UnusedLanes & ~DstIntLaneMask).any()) {
- BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
- DstInt->createSubRangeFrom(Allocator, UnusedLanes, *DstInt);
- DstIntLaneMask |= UnusedLanes;
- }
-
- // After duplicating the live ranges for the low/hi bits, we
- // need to update the subranges of the DstReg interval such that
- // for a case like this:
- //
- // entry:
- // 16B %1:gpr32 = INSTRUCTION (<=> UseMI)
- // :
- // if.then:
- // 32B %1:gpr32 = MOVIMM32 ..
- // 48B %0:gpr64 = SUBREG_TO_REG 0, %1, sub32
- //
- // Only the MOVIMM32 require a def of the top lanes and any intervals
- // for the top 32-bits of the def at 16B should be removed.
- for (LiveInterval::SubRange &SR : DstInt->subranges()) {
- if (!Writes || RequiresImplicitRedef ||
- (SR.LaneMask & UnusedLanes).none())
- continue;
-
- assert((SR.LaneMask & UnusedLanes) == SR.LaneMask &&
- "Unexpected lanemask. Subrange needs finer granularity");
-
- SlotIndex UseIdx = LIS->getInstructionIndex(*UseMI).getRegSlot(false);
- auto SegmentI = SR.find(UseIdx);
- if (SegmentI != SR.end())
- SR.removeSegment(SegmentI, true);
- }
- }
- }
-
// Replace SrcReg with DstReg in all UseMI operands.
for (unsigned Op : Ops) {
MachineOperand &MO = UseMI->getOperand(Op);
@@ -2015,7 +1922,7 @@ void RegisterCoalescer::updateRegDefsUses(
// turn a full def into a read-modify-write sub-register def and vice
// versa.
if (SubIdx && MO.isDef())
- MO.setIsUndef(!Reads || RequiresImplicitRedef);
+ MO.setIsUndef(!Reads);
// A subreg use of a partially undef (super) register may be a complete
// undef use now and then has to be marked that way.
@@ -2118,30 +2025,6 @@ void RegisterCoalescer::setUndefOnPrunedSubRegUses(LiveInterval &LI,
LIS->shrinkToUses(&LI);
}
-/// For a given use of value \p Idx, it returns the def in the current block,
-/// or otherwise all possible defs in preceding blocks.
-static bool FindDefInBlock(SmallPtrSetImpl<MachineBasicBlock *> &VisitedBlocks,
- SmallVector<MachineInstr *> &Instrs,
- LiveIntervals *LIS, LiveInterval &SrcInt,
- MachineBasicBlock *MBB, VNInfo *Idx) {
- if (!Idx->isPHIDef()) {
- MachineInstr *Def = LIS->getInstructionFromIndex(Idx->def);
- assert(Def && "Unable to find a def for SUBREG_TO_REG source operand");
- Instrs.push_back(Def);
- return true;
- }
-
- bool Any = false;
- if (VisitedBlocks.count(MBB))
- return false;
- VisitedBlocks.insert(MBB);
- for (MachineBasicBlock *Pred : MBB->predecessors()) {
- Any |= FindDefInBlock(VisitedBlocks, Instrs, LIS, SrcInt, Pred,
- SrcInt.getVNInfoBefore(LIS->getMBBEndIdx(Pred)));
- }
- return Any;
-}
-
bool RegisterCoalescer::joinCopy(
MachineInstr *CopyMI, bool &Again,
SmallPtrSetImpl<MachineInstr *> &CurrentErasedInstrs) {
@@ -2273,35 +2156,6 @@ bool RegisterCoalescer::joinCopy(
});
}
- SmallVector<MachineInstr *> SubregToRegSrcInsts;
- if (CopyMI->isSubregToReg()) {
- // For the case where the copy instruction is a SUBREG_TO_REG, e.g.
- //
- // %0:gpr32 = movimm32 ..
- // %1:gpr64 = SUBREG_TO_REG 0, %0, sub32
- // ...
- // %0:gpr32 = COPY <something>
- //
- // After joining liveranges, the original `movimm32` will need an
- // implicit-def to make it explicit that the entire register is written,
- // i.e.
- //
- // undef %0.sub32:gpr64 = movimm32 ..., implicit-def %0
- // ...
- // undef %0.sub32:gpr64 = COPY <something> // Note that this does not
- // // require an implicit-def,
- // // because it has nothing to
- // // do with the SUBREG_TO_REG.
- LiveInterval &SrcInt =
- LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
- SlotIndex SubregToRegSlotIdx = LIS->getInstructionIndex(*CopyMI);
- SmallPtrSet<MachineBasicBlock *, 8> VisitedBlocks;
- if (!FindDefInBlock(VisitedBlocks, SubregToRegSrcInsts, LIS, SrcInt,
- CopyMI->getParent(),
- SrcInt.Query(SubregToRegSlotIdx).valueIn()))
- llvm_unreachable("SUBREG_TO_REG src requires a def");
- }
-
ShrinkMask = LaneBitmask::getNone();
ShrinkMainRange = false;
@@ -2371,12 +2225,9 @@ bool RegisterCoalescer::joinCopy(
// Rewrite all SrcReg operands to DstReg.
// Also update DstReg operands to include DstIdx if it is set.
- if (CP.getDstIdx()) {
- assert(SubregToRegSrcInsts.empty() && "can this happen?");
+ if (CP.getDstIdx())
updateRegDefsUses(CP.getDstReg(), CP.getDstReg(), CP.getDstIdx());
- }
- updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx(),
- SubregToRegSrcInsts);
+ updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx());
// Shrink subregister ranges if necessary.
if (ShrinkMask.any()) {
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 11e869a..d70e96938 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -4075,18 +4075,11 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
unsigned BitWidth = VT.getScalarSizeInBits();
SDLoc DL(N);
- auto PeekThroughFreeze = [](SDValue N) {
- if (N->getOpcode() == ISD::FREEZE && N.hasOneUse())
- return N->getOperand(0);
- return N;
- };
-
if (SDValue V = foldSubCtlzNot<EmptyMatchContext>(N, DAG))
return V;
// fold (sub x, x) -> 0
- // FIXME: Refactor this and xor and other similar operations together.
- if (PeekThroughFreeze(N0) == PeekThroughFreeze(N1))
+ if (N0 == N1)
return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
// fold (sub c1, c2) -> c3
@@ -6499,19 +6492,21 @@ static unsigned getMinMaxOpcodeForFP(SDValue Operand1, SDValue Operand2,
// It is safe to use FMINNUM_IEEE/FMAXNUM_IEEE if all the operands
// are non NaN values.
if (((CC == ISD::SETLT || CC == ISD::SETLE) && (OrAndOpcode == ISD::OR)) ||
- ((CC == ISD::SETGT || CC == ISD::SETGE) && (OrAndOpcode == ISD::AND)))
+ ((CC == ISD::SETGT || CC == ISD::SETGE) && (OrAndOpcode == ISD::AND))) {
return arebothOperandsNotNan(Operand1, Operand2, DAG) &&
isFMAXNUMFMINNUM_IEEE
? ISD::FMINNUM_IEEE
: ISD::DELETED_NODE;
- else if (((CC == ISD::SETGT || CC == ISD::SETGE) &&
- (OrAndOpcode == ISD::OR)) ||
- ((CC == ISD::SETLT || CC == ISD::SETLE) &&
- (OrAndOpcode == ISD::AND)))
+ }
+
+ if (((CC == ISD::SETGT || CC == ISD::SETGE) && (OrAndOpcode == ISD::OR)) ||
+ ((CC == ISD::SETLT || CC == ISD::SETLE) && (OrAndOpcode == ISD::AND))) {
return arebothOperandsNotNan(Operand1, Operand2, DAG) &&
isFMAXNUMFMINNUM_IEEE
? ISD::FMAXNUM_IEEE
: ISD::DELETED_NODE;
+ }
+
// Both FMINNUM/FMAXNUM and FMINNUM_IEEE/FMAXNUM_IEEE handle quiet
// NaNs in the same way. But, FMINNUM/FMAXNUM and FMINNUM_IEEE/
// FMAXNUM_IEEE handle signaling NaNs differently. If we cannot prove
@@ -6521,24 +6516,24 @@ static unsigned getMinMaxOpcodeForFP(SDValue Operand1, SDValue Operand2,
// we can prove that we do not have any sNaNs, then we can do the
// optimization using FMINNUM_IEEE/FMAXNUM_IEEE for the following
// cases.
- else if (((CC == ISD::SETOLT || CC == ISD::SETOLE) &&
- (OrAndOpcode == ISD::OR)) ||
- ((CC == ISD::SETUGT || CC == ISD::SETUGE) &&
- (OrAndOpcode == ISD::AND)))
+ if (((CC == ISD::SETOLT || CC == ISD::SETOLE) && (OrAndOpcode == ISD::OR)) ||
+ ((CC == ISD::SETUGT || CC == ISD::SETUGE) && (OrAndOpcode == ISD::AND))) {
return isFMAXNUMFMINNUM ? ISD::FMINNUM
- : arebothOperandsNotSNan(Operand1, Operand2, DAG) &&
- isFMAXNUMFMINNUM_IEEE
- ? ISD::FMINNUM_IEEE
- : ISD::DELETED_NODE;
- else if (((CC == ISD::SETOGT || CC == ISD::SETOGE) &&
- (OrAndOpcode == ISD::OR)) ||
- ((CC == ISD::SETULT || CC == ISD::SETULE) &&
- (OrAndOpcode == ISD::AND)))
+ : arebothOperandsNotSNan(Operand1, Operand2, DAG) &&
+ isFMAXNUMFMINNUM_IEEE
+ ? ISD::FMINNUM_IEEE
+ : ISD::DELETED_NODE;
+ }
+
+ if (((CC == ISD::SETOGT || CC == ISD::SETOGE) && (OrAndOpcode == ISD::OR)) ||
+ ((CC == ISD::SETULT || CC == ISD::SETULE) && (OrAndOpcode == ISD::AND))) {
return isFMAXNUMFMINNUM ? ISD::FMAXNUM
- : arebothOperandsNotSNan(Operand1, Operand2, DAG) &&
- isFMAXNUMFMINNUM_IEEE
- ? ISD::FMAXNUM_IEEE
- : ISD::DELETED_NODE;
+ : arebothOperandsNotSNan(Operand1, Operand2, DAG) &&
+ isFMAXNUMFMINNUM_IEEE
+ ? ISD::FMAXNUM_IEEE
+ : ISD::DELETED_NODE;
+ }
+
return ISD::DELETED_NODE;
}
@@ -13184,14 +13179,14 @@ static SDValue combineVSelectWithAllOnesOrZeros(SDValue Cond, SDValue TVal,
// select Cond, -1, x → or Cond, x
if (IsTAllOne) {
- SDValue X = DAG.getBitcast(CondVT, FVal);
+ SDValue X = DAG.getBitcast(CondVT, DAG.getFreeze(FVal));
SDValue Or = DAG.getNode(ISD::OR, DL, CondVT, Cond, X);
return DAG.getBitcast(VT, Or);
}
// select Cond, x, 0 → and Cond, x
if (IsFAllZero) {
- SDValue X = DAG.getBitcast(CondVT, TVal);
+ SDValue X = DAG.getBitcast(CondVT, DAG.getFreeze(TVal));
SDValue And = DAG.getNode(ISD::AND, DL, CondVT, Cond, X);
return DAG.getBitcast(VT, And);
}
@@ -13199,7 +13194,7 @@ static SDValue combineVSelectWithAllOnesOrZeros(SDValue Cond, SDValue TVal,
// select Cond, 0, x -> and not(Cond), x
if (IsTAllZero &&
(isBitwiseNot(peekThroughBitcasts(Cond)) || TLI.hasAndNot(Cond))) {
- SDValue X = DAG.getBitcast(CondVT, FVal);
+ SDValue X = DAG.getBitcast(CondVT, DAG.getFreeze(FVal));
SDValue And =
DAG.getNode(ISD::AND, DL, CondVT, DAG.getNOT(DL, Cond, CondVT), X);
return DAG.getBitcast(VT, And);
@@ -16754,6 +16749,17 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) {
if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, /*PoisonOnly*/ false))
return N0;
+ // If we have frozen and unfrozen users of N0, update so everything uses N.
+ if (!N0.isUndef() && !N0.hasOneUse()) {
+ SDValue FrozenN0(N, 0);
+ DAG.ReplaceAllUsesOfValueWith(N0, FrozenN0);
+ // ReplaceAllUsesOfValueWith will have also updated the use in N, thus
+ // creating a cycle in a DAG. Let's undo that by mutating the freeze.
+ assert(N->getOperand(0) == FrozenN0 && "Expected cycle in DAG");
+ DAG.UpdateNodeOperands(N, N0);
+ return FrozenN0;
+ }
+
// We currently avoid folding freeze over SRA/SRL, due to the problems seen
// with (freeze (assert ext)) blocking simplifications of SRA/SRL. See for
// example https://reviews.llvm.org/D136529#4120959.
@@ -16807,8 +16813,7 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) {
SmallSet<SDValue, 8> MaybePoisonOperands;
SmallVector<unsigned, 8> MaybePoisonOperandNumbers;
for (auto [OpNo, Op] : enumerate(N0->ops())) {
- if (DAG.isGuaranteedNotToBeUndefOrPoison(Op, /*PoisonOnly*/ false,
- /*Depth*/ 1))
+ if (DAG.isGuaranteedNotToBeUndefOrPoison(Op, /*PoisonOnly=*/false))
continue;
bool HadMaybePoisonOperands = !MaybePoisonOperands.empty();
bool IsNewMaybePoisonOperand = MaybePoisonOperands.insert(Op).second;
@@ -22534,6 +22539,56 @@ SDValue DAGCombiner::visitATOMIC_STORE(SDNode *N) {
return SDValue();
}
+static SDValue foldToMaskedStore(StoreSDNode *Store, SelectionDAG &DAG,
+ const SDLoc &Dl) {
+ if (!Store->isSimple() || !ISD::isNormalStore(Store))
+ return SDValue();
+
+ SDValue StoredVal = Store->getValue();
+ SDValue StorePtr = Store->getBasePtr();
+ SDValue StoreOffset = Store->getOffset();
+ EVT VT = Store->getMemoryVT();
+ unsigned AddrSpace = Store->getAddressSpace();
+ Align Alignment = Store->getAlign();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ if (!TLI.isOperationLegalOrCustom(ISD::MSTORE, VT) ||
+ !TLI.allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment))
+ return SDValue();
+
+ SDValue Mask, OtherVec, LoadCh;
+ unsigned LoadPos;
+ if (sd_match(StoredVal,
+ m_VSelect(m_Value(Mask), m_Value(OtherVec),
+ m_Load(m_Value(LoadCh), m_Specific(StorePtr),
+ m_Specific(StoreOffset))))) {
+ LoadPos = 2;
+ } else if (sd_match(StoredVal,
+ m_VSelect(m_Value(Mask),
+ m_Load(m_Value(LoadCh), m_Specific(StorePtr),
+ m_Specific(StoreOffset)),
+ m_Value(OtherVec)))) {
+ LoadPos = 1;
+ } else {
+ return SDValue();
+ }
+
+ auto *Load = cast<LoadSDNode>(StoredVal.getOperand(LoadPos));
+ if (!Load->isSimple() || !ISD::isNormalLoad(Load) ||
+ Load->getAddressSpace() != AddrSpace)
+ return SDValue();
+
+ if (!Store->getChain().reachesChainWithoutSideEffects(LoadCh))
+ return SDValue();
+
+ if (LoadPos == 1)
+ Mask = DAG.getNOT(Dl, Mask, Mask.getValueType());
+
+ return DAG.getMaskedStore(Store->getChain(), Dl, OtherVec, StorePtr,
+ StoreOffset, Mask, VT, Store->getMemOperand(),
+ Store->getAddressingMode());
+}
+
SDValue DAGCombiner::visitSTORE(SDNode *N) {
StoreSDNode *ST = cast<StoreSDNode>(N);
SDValue Chain = ST->getChain();
@@ -22768,6 +22823,9 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
if (SDValue NewSt = splitMergedValStore(ST))
return NewSt;
+ if (SDValue MaskedStore = foldToMaskedStore(ST, DAG, SDLoc(N)))
+ return MaskedStore;
+
return ReduceLoadOpStoreWidth(N);
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 583a85a..a5bd97a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -2217,8 +2217,9 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BITCAST(SDNode *N) {
switch (getTypeAction(InVT)) {
case TargetLowering::TypePromoteInteger: {
- // TODO: Handle big endian
- if (OutVT.isVector() && DAG.getDataLayout().isLittleEndian()) {
+ // TODO: Handle big endian & vector input type.
+ if (OutVT.isVector() && !InVT.isVector() &&
+ DAG.getDataLayout().isLittleEndian()) {
EVT EltVT = OutVT.getVectorElementType();
TypeSize EltSize = EltVT.getSizeInBits();
TypeSize NInSize = NInVT.getSizeInBits();
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index f41b6eb..61f1144 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -6351,8 +6351,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
break;
case ISD::FREEZE:
assert(VT == N1.getValueType() && "Unexpected VT!");
- if (isGuaranteedNotToBeUndefOrPoison(N1, /*PoisonOnly*/ false,
- /*Depth*/ 1))
+ if (isGuaranteedNotToBeUndefOrPoison(N1, /*PoisonOnly=*/false))
return N1;
break;
case ISD::TokenFactor:
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 306e068..ac0440f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -7598,7 +7598,9 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
if (TM.getOptLevel() == CodeGenOptLevel::None)
return;
- const AllocaInst *LifetimeObject = cast<AllocaInst>(I.getArgOperand(1));
+ const AllocaInst *LifetimeObject = dyn_cast<AllocaInst>(I.getArgOperand(1));
+ if (!LifetimeObject)
+ return;
// First check that the Alloca is static, otherwise it won't have a
// valid frame index.
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 48d6b99..a68f521 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -775,13 +775,6 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
break;
}
- case ISD::FREEZE: {
- SDValue N0 = Op.getOperand(0);
- if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, DemandedElts,
- /*PoisonOnly=*/false, Depth + 1))
- return N0;
- break;
- }
case ISD::AND: {
LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
@@ -5363,10 +5356,25 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
if (AndRHSC.isNegatedPowerOf2() && C1.isSubsetOf(AndRHSC)) {
unsigned ShiftBits = AndRHSC.countr_zero();
if (!shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
+ // If using an unsigned shift doesn't yield a legal compare
+ // immediate, try using sra instead.
+ APInt NewC = C1.lshr(ShiftBits);
+ if (NewC.getSignificantBits() <= 64 &&
+ !isLegalICmpImmediate(NewC.getSExtValue())) {
+ APInt SignedC = C1.ashr(ShiftBits);
+ if (SignedC.getSignificantBits() <= 64 &&
+ isLegalICmpImmediate(SignedC.getSExtValue())) {
+ SDValue Shift = DAG.getNode(
+ ISD::SRA, dl, ShValTy, N0.getOperand(0),
+ DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));
+ SDValue CmpRHS = DAG.getConstant(SignedC, dl, ShValTy);
+ return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
+ }
+ }
SDValue Shift = DAG.getNode(
ISD::SRL, dl, ShValTy, N0.getOperand(0),
DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));
- SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), dl, ShValTy);
+ SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy);
return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
}
}
@@ -6482,8 +6490,8 @@ SDValue TargetLowering::buildSDIVPow2WithCMov(
Created.push_back(CMov.getNode());
// Divide by pow2.
- SDValue SRA =
- DAG.getNode(ISD::SRA, DL, VT, CMov, DAG.getConstant(Lg2, DL, VT));
+ SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, CMov,
+ DAG.getShiftAmountConstant(Lg2, VT, DL));
// If we're dividing by a positive value, we're done. Otherwise, we must
// negate the result.
diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp
index 705e046e..9e49ddd 100644
--- a/llvm/lib/CodeGen/TargetInstrInfo.cpp
+++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp
@@ -993,7 +993,7 @@ static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO,
MI = MRI.getUniqueVRegDef(MO.getReg());
// And it needs to be in the trace (otherwise, it won't have a depth).
if (!MI || MI->getParent() != &MBB ||
- ((unsigned)MI->getOpcode() != CombineOpc && CombineOpc != 0))
+ (MI->getOpcode() != CombineOpc && CombineOpc != 0))
return false;
// Must only used by the user we combine with.
if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 3c91b0e..9f525ea 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -697,7 +697,6 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm)
MaxGluedStoresPerMemcpy = 0;
MaxStoresPerMemsetOptSize = MaxStoresPerMemcpyOptSize =
MaxStoresPerMemmoveOptSize = MaxLoadsPerMemcmpOptSize = 4;
- HasMultipleConditionRegisters = false;
HasExtractBitsInsn = false;
JumpIsExpensive = JumpIsExpensiveOverride;
PredictableSelectIsExpensive = false;
@@ -905,6 +904,8 @@ void TargetLoweringBase::initActions() {
setOperationAction(ISD::GET_FPENV, VT, Expand);
setOperationAction(ISD::SET_FPENV, VT, Expand);
setOperationAction(ISD::RESET_FPENV, VT, Expand);
+
+ setOperationAction(ISD::MSTORE, VT, Expand);
}
// Most targets ignore the @llvm.prefetch intrinsic.
diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index e9172f4..d19ef92 100644
--- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -402,8 +402,8 @@ void TargetLoweringObjectFileELF::emitPersonalityValue(
const MachineModuleInfo *MMI) const {
SmallString<64> NameData("DW.ref.");
NameData += Sym->getName();
- MCSymbolELF *Label =
- cast<MCSymbolELF>(getContext().getOrCreateSymbol(NameData));
+ auto *Label =
+ static_cast<MCSymbolELF *>(getContext().getOrCreateSymbol(NameData));
Streamer.emitSymbolAttribute(Label, MCSA_Hidden);
Streamer.emitSymbolAttribute(Label, MCSA_Weak);
unsigned Flags = ELF::SHF_ALLOC | ELF::SHF_WRITE | ELF::SHF_GROUP;
@@ -581,7 +581,8 @@ static const MCSymbolELF *getLinkedToSymbol(const GlobalObject *GO,
auto *VM = cast<ValueAsMetadata>(MD->getOperand(0).get());
auto *OtherGV = dyn_cast<GlobalValue>(VM->getValue());
- return OtherGV ? dyn_cast<MCSymbolELF>(TM.getSymbol(OtherGV)) : nullptr;
+ return OtherGV ? static_cast<const MCSymbolELF *>(TM.getSymbol(OtherGV))
+ : nullptr;
}
static unsigned getEntrySizeForKind(SectionKind Kind) {
@@ -1011,7 +1012,7 @@ MCSection *TargetLoweringObjectFileELF::getSectionForLSDA(
(getContext().getAsmInfo()->useIntegratedAssembler() &&
getContext().getAsmInfo()->binutilsIsAtLeast(2, 36))) {
Flags |= ELF::SHF_LINK_ORDER;
- LinkedToSym = cast<MCSymbolELF>(&FnSym);
+ LinkedToSym = static_cast<const MCSymbolELF *>(&FnSym);
}
// Append the function name as the suffix like GCC, assuming
@@ -2370,9 +2371,10 @@ bool TargetLoweringObjectFileXCOFF::ShouldSetSSPCanaryBitInTB(
MCSymbol *
TargetLoweringObjectFileXCOFF::getEHInfoTableSymbol(const MachineFunction *MF) {
- MCSymbol *EHInfoSym = MF->getContext().getOrCreateSymbol(
- "__ehinfo." + Twine(MF->getFunctionNumber()));
- cast<MCSymbolXCOFF>(EHInfoSym)->setEHInfo();
+ auto *EHInfoSym =
+ static_cast<MCSymbolXCOFF *>(MF->getContext().getOrCreateSymbol(
+ "__ehinfo." + Twine(MF->getFunctionNumber())));
+ EHInfoSym->setEHInfo();
return EHInfoSym;
}
@@ -2510,7 +2512,8 @@ MCSection *TargetLoweringObjectFileXCOFF::SelectSectionForGlobal(
if (Kind.isText()) {
if (TM.getFunctionSections()) {
- return cast<MCSymbolXCOFF>(getFunctionEntryPointSymbol(GO, TM))
+ return static_cast<const MCSymbolXCOFF *>(
+ getFunctionEntryPointSymbol(GO, TM))
->getRepresentedCsect();
}
return TextSection;
@@ -2713,7 +2716,7 @@ MCSection *TargetLoweringObjectFileXCOFF::getSectionForTOCEntry(
const MCSymbol *Sym, const TargetMachine &TM) const {
const XCOFF::StorageMappingClass SMC = [](const MCSymbol *Sym,
const TargetMachine &TM) {
- const MCSymbolXCOFF *XSym = cast<MCSymbolXCOFF>(Sym);
+ auto *XSym = static_cast<const MCSymbolXCOFF *>(Sym);
// The "_$TLSML" symbol for TLS local-dynamic mode requires XMC_TC,
// otherwise the AIX assembler will complain.
@@ -2737,8 +2740,8 @@ MCSection *TargetLoweringObjectFileXCOFF::getSectionForTOCEntry(
}(Sym, TM);
return getContext().getXCOFFSection(
- cast<MCSymbolXCOFF>(Sym)->getSymbolTableName(), SectionKind::getData(),
- XCOFF::CsectProperties(SMC, XCOFF::XTY_SD));
+ static_cast<const MCSymbolXCOFF *>(Sym)->getSymbolTableName(),
+ SectionKind::getData(), XCOFF::CsectProperties(SMC, XCOFF::XTY_SD));
}
MCSection *TargetLoweringObjectFileXCOFF::getSectionForLSDA(
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
index 6d0a94d..73df62a 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
@@ -2260,7 +2260,7 @@ public:
continue;
}
- if (Section.relocation_begin() == Section.relocation_end())
+ if (Section.relocations().empty())
continue;
// Symbol to [address, section index] cache mapping.
diff --git a/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp b/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp
index 308dac4..09ac0f1 100644
--- a/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp
@@ -261,7 +261,7 @@ private:
// Skip relocations virtual sections.
if (S.isVirtual()) {
- if (S.relocation_begin() != S.relocation_end())
+ if (!S.relocations().empty())
return make_error<JITLinkError>("Virtual section contains "
"relocations");
continue;
diff --git a/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp b/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp
index bb5f3ab..27209a8 100644
--- a/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp
@@ -236,7 +236,7 @@ private:
// Skip relocations virtual sections.
if (S.isVirtual()) {
- if (S.relocation_begin() != S.relocation_end())
+ if (!S.relocations().empty())
return make_error<JITLinkError>("Virtual section contains "
"relocations");
continue;
diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
index a8559e7..6de6cc7 100644
--- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
+++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
@@ -891,7 +891,7 @@ RuntimeDyldImpl::emitSection(const ObjectFile &Obj,
// Align DataSize to stub alignment if we have any stubs (PaddingSize will
// have been increased above to account for this).
if (StubBufSize > 0)
- DataSize &= -(uint64_t)getStubAlignment().value();
+ DataSize &= -getStubAlignment().value();
}
LLVM_DEBUG(dbgs() << "emitSection SectionID: " << SectionID << " Name: "
diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
index ffc7696..08d6c78 100644
--- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
+++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
@@ -2822,7 +2822,7 @@ Error RuntimeDyldELF::finalizeLoad(const ObjectFile &Obj,
// object's sections to GOTs.
for (section_iterator SI = Obj.section_begin(), SE = Obj.section_end();
SI != SE; ++SI) {
- if (SI->relocation_begin() != SI->relocation_end()) {
+ if (!SI->relocations().empty()) {
Expected<section_iterator> RelSecOrErr = SI->getRelocatedSection();
if (!RelSecOrErr)
return make_error<RuntimeDyldError>(
diff --git a/llvm/lib/Frontend/HLSL/RootSignatureMetadata.cpp b/llvm/lib/Frontend/HLSL/RootSignatureMetadata.cpp
index 48ff1ca..6d89fa7 100644
--- a/llvm/lib/Frontend/HLSL/RootSignatureMetadata.cpp
+++ b/llvm/lib/Frontend/HLSL/RootSignatureMetadata.cpp
@@ -270,11 +270,11 @@ Error MetadataParser::parseRootConstants(mcdxbc::RootSignatureDesc &RSD,
Error MetadataParser::parseRootDescriptors(
mcdxbc::RootSignatureDesc &RSD, MDNode *RootDescriptorNode,
RootSignatureElementKind ElementKind) {
- assert(ElementKind == RootSignatureElementKind::SRV ||
- ElementKind == RootSignatureElementKind::UAV ||
- ElementKind == RootSignatureElementKind::CBV &&
- "parseRootDescriptors should only be called with RootDescriptor "
- "element kind.");
+ assert((ElementKind == RootSignatureElementKind::SRV ||
+ ElementKind == RootSignatureElementKind::UAV ||
+ ElementKind == RootSignatureElementKind::CBV) &&
+ "parseRootDescriptors should only be called with RootDescriptor "
+ "element kind.");
if (RootDescriptorNode->getNumOperands() != 5)
return make_error<InvalidRSMetadataFormat>("Root Descriptor Element");
diff --git a/llvm/lib/Frontend/Offloading/CMakeLists.txt b/llvm/lib/Frontend/Offloading/CMakeLists.txt
index 8e1ede9..9747dbd 100644
--- a/llvm/lib/Frontend/Offloading/CMakeLists.txt
+++ b/llvm/lib/Frontend/Offloading/CMakeLists.txt
@@ -1,6 +1,7 @@
add_llvm_component_library(LLVMFrontendOffloading
Utility.cpp
OffloadWrapper.cpp
+ PropertySet.cpp
ADDITIONAL_HEADER_DIRS
${LLVM_MAIN_INCLUDE_DIR}/llvm/Frontend
diff --git a/llvm/lib/Frontend/Offloading/PropertySet.cpp b/llvm/lib/Frontend/Offloading/PropertySet.cpp
new file mode 100644
index 0000000..a70290d
--- /dev/null
+++ b/llvm/lib/Frontend/Offloading/PropertySet.cpp
@@ -0,0 +1,102 @@
+///===- llvm/Frontend/Offloading/PropertySet.cpp --------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Frontend/Offloading/PropertySet.h"
+#include "llvm/Support/Base64.h"
+#include "llvm/Support/JSON.h"
+#include "llvm/Support/MemoryBufferRef.h"
+
+using namespace llvm;
+using namespace llvm::offloading;
+
+void llvm::offloading::writePropertiesToJSON(
+ const PropertySetRegistry &PSRegistry, raw_ostream &Out) {
+ json::OStream J(Out);
+ J.object([&] {
+ for (const auto &[CategoryName, PropSet] : PSRegistry) {
+ auto PropSetCapture = PropSet;
+ J.attributeObject(CategoryName, [&] {
+ for (const auto &[PropName, PropVal] : PropSetCapture) {
+ switch (PropVal.index()) {
+ case 0:
+ J.attribute(PropName, std::get<uint32_t>(PropVal));
+ break;
+ case 1:
+ J.attribute(PropName, encodeBase64(std::get<ByteArray>(PropVal)));
+ break;
+ default:
+ llvm_unreachable("unsupported property type");
+ }
+ }
+ });
+ }
+ });
+}
+
+// note: createStringError has an overload that takes a format string,
+// but it uses llvm::format instead of llvm::formatv, which does
+// not work with json::Value. This is a helper function to use
+// llvm::formatv with createStringError.
+template <typename... Ts> auto createStringErrorV(Ts &&...Args) {
+ return createStringError(formatv(std::forward<Ts>(Args)...));
+}
+
+Expected<PropertyValue>
+readPropertyValueFromJSON(const json::Value &PropValueVal) {
+ if (std::optional<uint64_t> Val = PropValueVal.getAsUINT64())
+ return PropertyValue(static_cast<uint32_t>(*Val));
+
+ if (std::optional<StringRef> Val = PropValueVal.getAsString()) {
+ std::vector<char> Decoded;
+ if (Error E = decodeBase64(*Val, Decoded))
+ return createStringErrorV("unable to base64 decode the string {0}: {1}",
+ Val, toString(std::move(E)));
+ return PropertyValue(ByteArray(Decoded.begin(), Decoded.end()));
+ }
+
+ return createStringErrorV("expected a uint64 or a string, got {0}",
+ PropValueVal);
+}
+
+Expected<PropertySetRegistry>
+llvm::offloading::readPropertiesFromJSON(MemoryBufferRef Buf) {
+ PropertySetRegistry Res;
+ Expected<json::Value> V = json::parse(Buf.getBuffer());
+ if (Error E = V.takeError())
+ return E;
+
+ const json::Object *O = V->getAsObject();
+ if (!O)
+ return createStringErrorV(
+ "error while deserializing property set registry: "
+ "expected JSON object, got {0}",
+ *V);
+
+ for (const auto &[CategoryName, Value] : *O) {
+ const json::Object *PropSetVal = Value.getAsObject();
+ if (!PropSetVal)
+ return createStringErrorV("error while deserializing property set {0}: "
+ "expected JSON array, got {1}",
+ CategoryName.str(), Value);
+
+ PropertySet &PropSet = Res[CategoryName.str()];
+ for (const auto &[PropName, PropValueVal] : *PropSetVal) {
+ Expected<PropertyValue> Prop = readPropertyValueFromJSON(PropValueVal);
+ if (Error E = Prop.takeError())
+ return createStringErrorV(
+ "error while deserializing property {0} in property set {1}: {2}",
+ PropName.str(), CategoryName.str(), toString(std::move(E)));
+
+ auto [It, Inserted] =
+ PropSet.try_emplace(PropName.str(), std::move(*Prop));
+ assert(Inserted && "Property already exists in PropertySet");
+ (void)Inserted;
+ }
+ }
+ return Res;
+}
diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp
index e5a4e1e..dc6d599 100644
--- a/llvm/lib/IR/AsmWriter.cpp
+++ b/llvm/lib/IR/AsmWriter.cpp
@@ -1163,7 +1163,7 @@ int SlotTracker::processIndex() {
std::vector<StringRef> ModulePaths;
for (auto &[ModPath, _] : TheIndex->modulePaths())
ModulePaths.push_back(ModPath);
- llvm::sort(ModulePaths.begin(), ModulePaths.end());
+ llvm::sort(ModulePaths);
for (auto &ModPath : ModulePaths)
CreateModulePathSlot(ModPath);
diff --git a/llvm/lib/IR/Instruction.cpp b/llvm/lib/IR/Instruction.cpp
index 763cc18..b7cd12a 100644
--- a/llvm/lib/IR/Instruction.cpp
+++ b/llvm/lib/IR/Instruction.cpp
@@ -942,14 +942,13 @@ bool Instruction::isIdenticalToWhenDefined(const Instruction *I,
// We have two instructions of identical opcode and #operands. Check to see
// if all operands are the same.
- if (!std::equal(op_begin(), op_end(), I->op_begin()))
+ if (!equal(operands(), I->operands()))
return false;
// WARNING: this logic must be kept in sync with EliminateDuplicatePHINodes()!
- if (const PHINode *thisPHI = dyn_cast<PHINode>(this)) {
- const PHINode *otherPHI = cast<PHINode>(I);
- return std::equal(thisPHI->block_begin(), thisPHI->block_end(),
- otherPHI->block_begin());
+ if (const PHINode *Phi = dyn_cast<PHINode>(this)) {
+ const PHINode *OtherPhi = cast<PHINode>(I);
+ return equal(Phi->blocks(), OtherPhi->blocks());
}
return this->hasSameSpecialState(I, /*IgnoreAlignment=*/false,
diff --git a/llvm/lib/IR/Mangler.cpp b/llvm/lib/IR/Mangler.cpp
index 010bd15..ca6a480 100644
--- a/llvm/lib/IR/Mangler.cpp
+++ b/llvm/lib/IR/Mangler.cpp
@@ -292,6 +292,9 @@ void llvm::emitLinkerFlagsForUsedCOFF(raw_ostream &OS, const GlobalValue *GV,
}
std::optional<std::string> llvm::getArm64ECMangledFunctionName(StringRef Name) {
+ assert(!Name.empty() &&
+ "getArm64ECMangledFunctionName requires non-empty name");
+
if (Name[0] != '?') {
// For non-C++ symbols, prefix the name with "#" unless it's already
// mangled.
diff --git a/llvm/lib/IR/RuntimeLibcalls.cpp b/llvm/lib/IR/RuntimeLibcalls.cpp
index 5936ac7..a8e6c79 100644
--- a/llvm/lib/IR/RuntimeLibcalls.cpp
+++ b/llvm/lib/IR/RuntimeLibcalls.cpp
@@ -8,6 +8,9 @@
#include "llvm/IR/RuntimeLibcalls.h"
#include "llvm/ADT/StringTable.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "runtime-libcalls-info"
using namespace llvm;
using namespace RTLIB;
@@ -44,11 +47,9 @@ static void setARMLibcallNames(RuntimeLibcallsInfo &Info, const Triple &TT,
RTLIB::__aeabi_ui2f, RTLIB::__aeabi_l2f,
RTLIB::__aeabi_ul2f, RTLIB::__aeabi_lmul,
RTLIB::__aeabi_llsl, RTLIB::__aeabi_llsr,
- RTLIB::__aeabi_lasr, RTLIB::__aeabi_idiv__i8,
- RTLIB::__aeabi_idiv__i16, RTLIB::__aeabi_idiv__i32,
+ RTLIB::__aeabi_lasr, RTLIB::__aeabi_idiv,
RTLIB::__aeabi_idivmod, RTLIB::__aeabi_uidivmod,
- RTLIB::__aeabi_ldivmod, RTLIB::__aeabi_uidiv__i8,
- RTLIB::__aeabi_uidiv__i16, RTLIB::__aeabi_uidiv__i32,
+ RTLIB::__aeabi_ldivmod, RTLIB::__aeabi_uidiv,
RTLIB::__aeabi_uldivmod, RTLIB::__aeabi_f2h,
RTLIB::__aeabi_d2h, RTLIB::__aeabi_h2f,
RTLIB::__aeabi_memcpy, RTLIB::__aeabi_memmove,
@@ -62,12 +63,6 @@ static void setARMLibcallNames(RuntimeLibcallsInfo &Info, const Triple &TT,
Info.setLibcallImplCallingConv(Impl, CallingConv::ARM_AAPCS);
}
-void RTLIB::RuntimeLibcallsInfo::initDefaultLibCallImpls() {
- std::memcpy(LibcallImpls, DefaultLibcallImpls, sizeof(LibcallImpls));
- static_assert(sizeof(LibcallImpls) == sizeof(DefaultLibcallImpls),
- "libcall array size should match");
-}
-
/// Set default libcall names. If a target wants to opt-out of a libcall it
/// should be placed here.
void RuntimeLibcallsInfo::initLibcalls(const Triple &TT,
@@ -76,59 +71,14 @@ void RuntimeLibcallsInfo::initLibcalls(const Triple &TT,
EABI EABIVersion, StringRef ABIName) {
setTargetRuntimeLibcallSets(TT, FloatABI);
- // Early exit for targets that have fully ported to tablegen.
- if (TT.isAMDGPU() || TT.isNVPTX() || TT.isWasm())
- return;
-
- if (TT.isX86() || TT.isVE() || TT.isARM() || TT.isThumb()) {
- if (ExceptionModel == ExceptionHandling::SjLj)
- setLibcallImpl(RTLIB::UNWIND_RESUME, RTLIB::_Unwind_SjLj_Resume);
- }
-
- // A few names are different on particular architectures or environments.
- if (TT.isOSDarwin()) {
- // For f16/f32 conversions, Darwin uses the standard naming scheme,
- // instead of the gnueabi-style __gnu_*_ieee.
- // FIXME: What about other targets?
- setLibcallImpl(RTLIB::FPEXT_F16_F32, RTLIB::__extendhfsf2);
- setLibcallImpl(RTLIB::FPROUND_F32_F16, RTLIB::__truncsfhf2);
-
- if (!darwinHasExp10(TT)) {
- setLibcallImpl(RTLIB::EXP10_F32, RTLIB::Unsupported);
- setLibcallImpl(RTLIB::EXP10_F64, RTLIB::Unsupported);
- }
- }
-
- if (TT.isOSOpenBSD()) {
- setLibcallImpl(RTLIB::STACKPROTECTOR_CHECK_FAIL, RTLIB::Unsupported);
- setLibcallImpl(RTLIB::STACK_SMASH_HANDLER, RTLIB::__stack_smash_handler);
- }
-
- // Skip default manual processing for targets that have been fully ported to
- // tablegen for now. Eventually the rest of this should be deleted.
- if (TT.isX86() || TT.isAArch64() || TT.isWasm())
- return;
+ if (ExceptionModel == ExceptionHandling::SjLj)
+ setLibcallImpl(RTLIB::UNWIND_RESUME, RTLIB::_Unwind_SjLj_Resume);
if (TT.isARM() || TT.isThumb()) {
setARMLibcallNames(*this, TT, FloatABI, EABIVersion);
return;
}
- if (hasSinCos(TT)) {
- setLibcallImpl(RTLIB::SINCOS_F32, RTLIB::sincosf);
- setLibcallImpl(RTLIB::SINCOS_F64, RTLIB::sincos);
- setLibcallImpl(RTLIB::SINCOS_F128, RTLIB::sincos_f128);
- }
-
- // These libcalls are only available in compiler-rt, not libgcc.
- if (TT.isArch64Bit()) {
- setLibcallImpl(RTLIB::SHL_I128, RTLIB::__ashlti3);
- setLibcallImpl(RTLIB::SRL_I128, RTLIB::__lshrti3);
- setLibcallImpl(RTLIB::SRA_I128, RTLIB::__ashrti3);
- setLibcallImpl(RTLIB::MUL_I128, RTLIB::__multi3);
- setLibcallImpl(RTLIB::MULO_I64, RTLIB::__mulodi4);
- }
-
if (TT.getArch() == Triple::ArchType::msp430) {
setLibcallImplCallingConv(RTLIB::__mspabi_mpyll,
CallingConv::MSP430_BUILTIN);
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index 3ff9895..ca3f148 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -6769,10 +6769,13 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
break;
}
case Intrinsic::lifetime_start:
- case Intrinsic::lifetime_end:
- Check(isa<AllocaInst>(Call.getArgOperand(1)),
- "llvm.lifetime.start/end can only be used on alloca", &Call);
+ case Intrinsic::lifetime_end: {
+ Value *Ptr = Call.getArgOperand(1);
+ Check(isa<AllocaInst>(Ptr) || isa<PoisonValue>(Ptr),
+ "llvm.lifetime.start/end can only be used on alloca or poison",
+ &Call);
break;
+ }
};
// Verify that there aren't any unmediated control transfers between funclets.
diff --git a/llvm/lib/MC/ELFObjectWriter.cpp b/llvm/lib/MC/ELFObjectWriter.cpp
index ae8dffc..8f3814a 100644
--- a/llvm/lib/MC/ELFObjectWriter.cpp
+++ b/llvm/lib/MC/ELFObjectWriter.cpp
@@ -401,16 +401,15 @@ static bool isIFunc(const MCSymbolELF *Symbol) {
mergeTypeForSet(Symbol->getType(), ELF::STT_GNU_IFUNC) !=
ELF::STT_GNU_IFUNC)
return false;
- Symbol = &cast<MCSymbolELF>(Value->getSymbol());
+ Symbol = &static_cast<const MCSymbolELF &>(Value->getSymbol());
}
return true;
}
void ELFWriter::writeSymbol(SymbolTableWriter &Writer, uint32_t StringIndex,
ELFSymbolData &MSD) {
- const auto &Symbol = cast<MCSymbolELF>(*MSD.Symbol);
- const MCSymbolELF *Base =
- cast_or_null<MCSymbolELF>(Asm.getBaseSymbol(Symbol));
+ auto &Symbol = static_cast<const MCSymbolELF &>(*MSD.Symbol);
+ auto *Base = static_cast<const MCSymbolELF *>(Asm.getBaseSymbol(Symbol));
// This has to be in sync with when computeSymbolTable uses SHN_ABS or
// SHN_COMMON.
@@ -446,7 +445,7 @@ void ELFWriter::writeSymbol(SymbolTableWriter &Writer, uint32_t StringIndex,
const MCSymbolELF *Sym = &Symbol;
while (Sym->isVariable()) {
if (auto *Expr = dyn_cast<MCSymbolRefExpr>(Sym->getVariableValue())) {
- Sym = cast<MCSymbolELF>(&Expr->getSymbol());
+ Sym = static_cast<const MCSymbolELF *>(&Expr->getSymbol());
if (!Sym->getSize())
continue;
ESize = Sym->getSize();
@@ -523,7 +522,7 @@ void ELFWriter::computeSymbolTable(const RevGroupMapTy &RevGroupMap) {
// Add the data for the symbols.
bool HasLargeSectionIndex = false;
for (auto It : llvm::enumerate(Asm.symbols())) {
- const auto &Symbol = cast<MCSymbolELF>(It.value());
+ auto &Symbol = static_cast<const MCSymbolELF &>(It.value());
if (!isInSymtab(Symbol))
continue;
@@ -533,7 +532,7 @@ void ELFWriter::computeSymbolTable(const RevGroupMapTy &RevGroupMap) {
}
ELFSymbolData MSD;
- MSD.Symbol = cast<MCSymbolELF>(&Symbol);
+ MSD.Symbol = static_cast<const MCSymbolELF *>(&Symbol);
MSD.Order = It.index();
bool Local = Symbol.getBinding() == ELF::STB_LOCAL;
@@ -1175,7 +1174,7 @@ void ELFObjectWriter::executePostLayoutBinding() {
// versions declared with @@@ to be renamed.
for (const Symver &S : Symvers) {
StringRef AliasName = S.Name;
- const auto &Symbol = cast<MCSymbolELF>(*S.Sym);
+ auto &Symbol = static_cast<const MCSymbolELF &>(*S.Sym);
size_t Pos = AliasName.find('@');
assert(Pos != StringRef::npos);
@@ -1185,8 +1184,8 @@ void ELFObjectWriter::executePostLayoutBinding() {
if (Rest.starts_with("@@@"))
Tail = Rest.substr(Symbol.isUndefined() ? 2 : 1);
- auto *Alias =
- cast<MCSymbolELF>(Asm->getContext().getOrCreateSymbol(Prefix + Tail));
+ auto *Alias = static_cast<MCSymbolELF *>(
+ Asm->getContext().getOrCreateSymbol(Prefix + Tail));
Asm->registerSymbol(*Alias);
const MCExpr *Value = MCSymbolRefExpr::create(&Symbol, Asm->getContext());
Alias->setVariableValue(Value);
@@ -1218,7 +1217,8 @@ void ELFObjectWriter::executePostLayoutBinding() {
}
for (const MCSymbol *&Sym : AddrsigSyms) {
- if (const MCSymbol *R = Renames.lookup(cast<MCSymbolELF>(Sym)))
+ if (const MCSymbol *R =
+ Renames.lookup(static_cast<const MCSymbolELF *>(Sym)))
Sym = R;
if (Sym->isInSection() && Sym->getName().starts_with(".L"))
Sym = Sym->getSection().getBeginSymbol();
@@ -1234,7 +1234,7 @@ void ELFObjectWriter::executePostLayoutBinding() {
continue;
auto *Expr = Alias->getVariableValue();
if (const auto *Inner = dyn_cast<MCSymbolRefExpr>(Expr)) {
- auto &Sym = cast<MCSymbolELF>(Inner->getSymbol());
+ auto &Sym = static_cast<const MCSymbolELF &>(Inner->getSymbol());
if (Asm->registerSymbol(Sym))
Sym.setBinding(ELF::STB_WEAK);
}
@@ -1316,7 +1316,7 @@ void ELFObjectWriter::recordRelocation(const MCFragment &F,
auto &Section = static_cast<const MCSectionELF &>(*F.getParent());
MCContext &Ctx = getContext();
- const auto *SymA = cast_or_null<MCSymbolELF>(Target.getAddSym());
+ auto *SymA = static_cast<const MCSymbolELF *>(Target.getAddSym());
const MCSectionELF *SecA =
(SymA && SymA->isInSection())
? static_cast<const MCSectionELF *>(&SymA->getSection())
@@ -1328,7 +1328,7 @@ void ELFObjectWriter::recordRelocation(const MCFragment &F,
uint64_t FixupOffset = Asm->getFragmentOffset(F) + Fixup.getOffset();
uint64_t Addend = Target.getConstant();
if (auto *RefB = Target.getSubSym()) {
- const auto &SymB = cast<MCSymbolELF>(*RefB);
+ auto &SymB = static_cast<const MCSymbolELF &>(*RefB);
if (SymB.isUndefined()) {
Ctx.reportError(Fixup.getLoc(),
Twine("symbol '") + SymB.getName() +
@@ -1363,7 +1363,7 @@ void ELFObjectWriter::recordRelocation(const MCFragment &F,
!mc::isRelocRelocation(Fixup.getKind());
if (UseSectionSym && useSectionSymbol(Target, SymA, Addend, Type)) {
Addend += Asm->getSymbolOffset(*SymA);
- SymA = cast<MCSymbolELF>(SecA->getBeginSymbol());
+ SymA = static_cast<const MCSymbolELF *>(SecA->getBeginSymbol());
} else if (const MCSymbolELF *R = Renames.lookup(SymA)) {
SymA = R;
}
@@ -1383,7 +1383,7 @@ bool ELFObjectWriter::usesRela(const MCTargetOptions *TO,
bool ELFObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(
const MCSymbol &SA, const MCFragment &FB, bool InSet, bool IsPCRel) const {
- const auto &SymA = cast<MCSymbolELF>(SA);
+ auto &SymA = static_cast<const MCSymbolELF &>(SA);
if (IsPCRel) {
assert(!InSet);
if (SymA.getBinding() != ELF::STB_LOCAL ||
diff --git a/llvm/lib/MC/GOFFObjectWriter.cpp b/llvm/lib/MC/GOFFObjectWriter.cpp
index 88188f3..3b629cd 100644
--- a/llvm/lib/MC/GOFFObjectWriter.cpp
+++ b/llvm/lib/MC/GOFFObjectWriter.cpp
@@ -345,7 +345,7 @@ void GOFFWriter::defineSymbols() {
for (const MCSymbol &Sym : Asm.symbols()) {
if (Sym.isTemporary())
continue;
- auto &Symbol = cast<MCSymbolGOFF>(Sym);
+ auto &Symbol = static_cast<const MCSymbolGOFF &>(Sym);
if (Symbol.hasLDAttributes()) {
Symbol.setIndex(++Ordinal);
defineLabel(Symbol);
diff --git a/llvm/lib/MC/MCAsmBackend.cpp b/llvm/lib/MC/MCAsmBackend.cpp
index 828d9cf..55ec4a6 100644
--- a/llvm/lib/MC/MCAsmBackend.cpp
+++ b/llvm/lib/MC/MCAsmBackend.cpp
@@ -8,6 +8,7 @@
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDXContainerWriter.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCGOFFObjectWriter.h"
@@ -122,14 +123,12 @@ void MCAsmBackend::maybeAddReloc(const MCFragment &F, const MCFixup &Fixup,
}
bool MCAsmBackend::isDarwinCanonicalPersonality(const MCSymbol *Sym) const {
+ assert(getContext().isMachO());
// Consider a NULL personality (ie., no personality encoding) to be canonical
// because it's always at 0.
if (!Sym)
return true;
- if (!Sym->isMachO())
- llvm_unreachable("Expected MachO symbols only");
-
StringRef name = Sym->getName();
// XXX: We intentionally leave out "___gcc_personality_v0" because, despite
// being system-defined like these two, it is not very commonly-used.
diff --git a/llvm/lib/MC/MCAsmStreamer.cpp b/llvm/lib/MC/MCAsmStreamer.cpp
index da51da4..93614cd 100644
--- a/llvm/lib/MC/MCAsmStreamer.cpp
+++ b/llvm/lib/MC/MCAsmStreamer.cpp
@@ -897,14 +897,14 @@ void MCAsmStreamer::emitXCOFFLocalCommonSymbol(MCSymbol *LabelSym,
// Print symbol's rename (original name contains invalid character(s)) if
// there is one.
- MCSymbolXCOFF *XSym = cast<MCSymbolXCOFF>(CsectSym);
+ auto *XSym = static_cast<MCSymbolXCOFF *>(CsectSym);
if (XSym->hasRename())
emitXCOFFRenameDirective(XSym, XSym->getSymbolTableName());
}
void MCAsmStreamer::emitXCOFFSymbolLinkageWithVisibility(
MCSymbol *Symbol, MCSymbolAttr Linkage, MCSymbolAttr Visibility) {
-
+ auto &Sym = static_cast<MCSymbolXCOFF &>(*Symbol);
switch (Linkage) {
case MCSA_Global:
OS << MAI->getGlobalDirective();
@@ -944,9 +944,8 @@ void MCAsmStreamer::emitXCOFFSymbolLinkageWithVisibility(
// Print symbol's rename (original name contains invalid character(s)) if
// there is one.
- if (cast<MCSymbolXCOFF>(Symbol)->hasRename())
- emitXCOFFRenameDirective(Symbol,
- cast<MCSymbolXCOFF>(Symbol)->getSymbolTableName());
+ if (Sym.hasRename())
+ emitXCOFFRenameDirective(&Sym, Sym.getSymbolTableName());
}
void MCAsmStreamer::emitXCOFFRenameDirective(const MCSymbol *Name,
@@ -1070,9 +1069,11 @@ void MCAsmStreamer::emitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
// Print symbol's rename (original name contains invalid character(s)) if
// there is one.
- MCSymbolXCOFF *XSym = dyn_cast<MCSymbolXCOFF>(Symbol);
- if (XSym && XSym->hasRename())
- emitXCOFFRenameDirective(XSym, XSym->getSymbolTableName());
+ if (getContext().isXCOFF()) {
+ auto *XSym = static_cast<MCSymbolXCOFF *>(Symbol);
+ if (XSym && XSym->hasRename())
+ emitXCOFFRenameDirective(XSym, XSym->getSymbolTableName());
+ }
}
void MCAsmStreamer::emitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
diff --git a/llvm/lib/MC/MCAssembler.cpp b/llvm/lib/MC/MCAssembler.cpp
index 8500fd1..d172ad1 100644
--- a/llvm/lib/MC/MCAssembler.cpp
+++ b/llvm/lib/MC/MCAssembler.cpp
@@ -59,7 +59,8 @@ STATISTIC(EmittedFillFragments,
"Number of emitted assembler fragments - fill");
STATISTIC(EmittedNopsFragments, "Number of emitted assembler fragments - nops");
STATISTIC(EmittedOrgFragments, "Number of emitted assembler fragments - org");
-STATISTIC(evaluateFixup, "Number of evaluated fixups");
+STATISTIC(Fixups, "Number of fixups");
+STATISTIC(FixupEvalForRelax, "Number of fixup evaluations for relaxation");
STATISTIC(ObjectBytes, "Number of emitted object file bytes");
STATISTIC(RelaxationSteps, "Number of assembler layout and relaxation steps");
STATISTIC(RelaxedInstructions, "Number of relaxed instructions");
@@ -140,9 +141,9 @@ bool MCAssembler::isThumbFunc(const MCSymbol *Symbol) const {
bool MCAssembler::evaluateFixup(const MCFragment &F, MCFixup &Fixup,
MCValue &Target, uint64_t &Value,
- bool RecordReloc,
- MutableArrayRef<char> Contents) const {
- ++stats::evaluateFixup;
+ bool RecordReloc, uint8_t *Data) const {
+ if (RecordReloc)
+ ++stats::Fixups;
// FIXME: This code has some duplication with recordRelocation. We should
// probably merge the two into a single callback that tries to evaluate a
@@ -185,7 +186,7 @@ bool MCAssembler::evaluateFixup(const MCFragment &F, MCFixup &Fixup,
if (IsResolved && mc::isRelocRelocation(Fixup.getKind()))
IsResolved = false;
- getBackend().applyFixup(F, Fixup, Target, Contents, Value, IsResolved);
+ getBackend().applyFixup(F, Fixup, Target, Data, Value, IsResolved);
return true;
}
@@ -703,21 +704,25 @@ void MCAssembler::layout() {
for (MCFixup &Fixup : F.getFixups()) {
uint64_t FixedValue;
MCValue Target;
+ assert(mc::isRelocRelocation(Fixup.getKind()) ||
+ Fixup.getOffset() <= F.getFixedSize());
+ auto *Data =
+ reinterpret_cast<uint8_t *>(Contents.data() + Fixup.getOffset());
evaluateFixup(F, Fixup, Target, FixedValue,
- /*RecordReloc=*/true, Contents);
+ /*RecordReloc=*/true, Data);
}
- if (F.getVarFixups().size()) {
- // In the variable part, fixup offsets are relative to the fixed part's
- // start. Extend the variable contents to the left to account for the
- // fixed part size.
- Contents = MutableArrayRef(F.getParent()->ContentStorage)
- .slice(F.VarContentStart - Contents.size(), F.getSize());
- for (MCFixup &Fixup : F.getVarFixups()) {
- uint64_t FixedValue;
- MCValue Target;
- evaluateFixup(F, Fixup, Target, FixedValue,
- /*RecordReloc=*/true, Contents);
- }
+ // In the variable part, fixup offsets are relative to the fixed part's
+ // start.
+ for (MCFixup &Fixup : F.getVarFixups()) {
+ uint64_t FixedValue;
+ MCValue Target;
+ assert(mc::isRelocRelocation(Fixup.getKind()) ||
+ (Fixup.getOffset() >= F.getFixedSize() &&
+ Fixup.getOffset() <= F.getSize()));
+ auto *Data = reinterpret_cast<uint8_t *>(
+ F.getVarContents().data() + (Fixup.getOffset() - F.getFixedSize()));
+ evaluateFixup(F, Fixup, Target, FixedValue,
+ /*RecordReloc=*/true, Data);
}
}
}
@@ -735,7 +740,7 @@ void MCAssembler::Finish() {
bool MCAssembler::fixupNeedsRelaxation(const MCFragment &F,
const MCFixup &Fixup) const {
- assert(getBackendPtr() && "Expected assembler backend");
+ ++stats::FixupEvalForRelax;
MCValue Target;
uint64_t Value;
bool Resolved = evaluateFixup(F, const_cast<MCFixup &>(Fixup), Target, Value,
@@ -940,6 +945,14 @@ bool MCAssembler::relaxFill(MCFillFragment &F) {
return true;
}
+bool MCAssembler::relaxOrg(MCOrgFragment &F) {
+ uint64_t Size = computeFragmentSize(F);
+ if (F.getSize() == Size)
+ return false;
+ F.setSize(Size);
+ return true;
+}
+
bool MCAssembler::relaxFragment(MCFragment &F) {
switch(F.getKind()) {
default:
@@ -961,6 +974,8 @@ bool MCAssembler::relaxFragment(MCFragment &F) {
return relaxCVDefRange(cast<MCCVDefRangeFragment>(F));
case MCFragment::FT_Fill:
return relaxFill(cast<MCFillFragment>(F));
+ case MCFragment::FT_Org:
+ return relaxOrg(static_cast<MCOrgFragment &>(F));
}
}
diff --git a/llvm/lib/MC/MCCodeView.cpp b/llvm/lib/MC/MCCodeView.cpp
index 7d528a5..335934a7 100644
--- a/llvm/lib/MC/MCCodeView.cpp
+++ b/llvm/lib/MC/MCCodeView.cpp
@@ -436,12 +436,11 @@ void CodeViewContext::emitInlineLineTableForFunction(MCObjectStreamer &OS,
const MCSymbol *FnEndSym) {
// Create and insert a fragment into the current section that will be encoded
// later.
- auto *F = MCCtx->allocFragment<MCCVInlineLineTableFragment>(
+ OS.newSpecialFragment<MCCVInlineLineTableFragment>(
PrimaryFunctionId, SourceFileId, SourceLineNum, FnStartSym, FnEndSym);
- OS.insert(F);
}
-MCFragment *CodeViewContext::emitDefRange(
+void CodeViewContext::emitDefRange(
MCObjectStreamer &OS,
ArrayRef<std::pair<const MCSymbol *, const MCSymbol *>> Ranges,
StringRef FixedSizePortion) {
@@ -451,9 +450,7 @@ MCFragment *CodeViewContext::emitDefRange(
auto &Saved = DefRangeStorage.emplace_back(Ranges.begin(), Ranges.end());
// Create and insert a fragment into the current section that will be encoded
// later.
- auto *F = MCCtx->allocFragment<MCCVDefRangeFragment>(Saved, FixedSizePortion);
- OS.insert(F);
- return F;
+ OS.newSpecialFragment<MCCVDefRangeFragment>(Saved, FixedSizePortion);
}
static unsigned computeLabelDiff(const MCAssembler &Asm, const MCSymbol *Begin,
@@ -695,5 +692,7 @@ void CodeViewContext::encodeDefRange(const MCAssembler &Asm,
}
Frag.setVarContents(Contents);
+ assert(Fixups.size() < 256 && "Store fixups outside of MCFragment's VarFixup "
+ "storage if the number ever exceeds 256");
Frag.setVarFixups(Fixups);
}
diff --git a/llvm/lib/MC/MCContext.cpp b/llvm/lib/MC/MCContext.cpp
index 39bf628..5e364e9 100644
--- a/llvm/lib/MC/MCContext.cpp
+++ b/llvm/lib/MC/MCContext.cpp
@@ -153,15 +153,12 @@ void MCContext::reset() {
SPIRVAllocator.DestroyAll();
WasmSignatureAllocator.DestroyAll();
- // ~CodeViewContext may destroy a MCFragment outside of sections and need to
- // be reset before FragmentAllocator.
CVContext.reset();
MCSubtargetAllocator.DestroyAll();
InlineAsmUsedLabelNames.clear();
Symbols.clear();
Allocator.Reset();
- FragmentAllocator.Reset();
Instances.clear();
CompilationDir.clear();
MainFileName.clear();
@@ -297,11 +294,9 @@ MCSymbol *MCContext::createSymbolImpl(const MCSymbolTableEntry *Name,
case MCContext::IsDXContainer:
break;
case MCContext::IsSPIRV:
- return new (Name, *this)
- MCSymbol(MCSymbol::SymbolKindUnset, Name, IsTemporary);
+ return new (Name, *this) MCSymbol(Name, IsTemporary);
}
- return new (Name, *this)
- MCSymbol(MCSymbol::SymbolKindUnset, Name, IsTemporary);
+ return new (Name, *this) MCSymbol(Name, IsTemporary);
}
MCSymbol *MCContext::cloneSymbol(MCSymbol &Sym) {
@@ -309,13 +304,16 @@ MCSymbol *MCContext::cloneSymbol(MCSymbol &Sym) {
auto Name = Sym.getNameEntryPtr();
switch (getObjectFileType()) {
case MCContext::IsCOFF:
- NewSym = new (Name, *this) MCSymbolCOFF(cast<MCSymbolCOFF>(Sym));
+ NewSym =
+ new (Name, *this) MCSymbolCOFF(static_cast<const MCSymbolCOFF &>(Sym));
break;
case MCContext::IsELF:
- NewSym = new (Name, *this) MCSymbolELF(cast<MCSymbolELF>(Sym));
+ NewSym =
+ new (Name, *this) MCSymbolELF(static_cast<const MCSymbolELF &>(Sym));
break;
case MCContext::IsMachO:
- NewSym = new (Name, *this) MCSymbolMachO(cast<MCSymbolMachO>(Sym));
+ NewSym = new (Name, *this)
+ MCSymbolMachO(static_cast<const MCSymbolMachO &>(Sym));
break;
default:
reportFatalUsageError(".set redefinition is not supported");
@@ -446,7 +444,7 @@ Symbol *MCContext::getOrCreateSectionSymbol(StringRef Section) {
// Use the symbol's index to track if it has been used as a section symbol.
// Set to -1 to catch potential bugs if misused as a symbol index.
if (Sym && Sym->getIndex() != -1u) {
- R = cast<Symbol>(Sym);
+ R = static_cast<Symbol *>(Sym);
} else {
SymEntry.second.Used = true;
R = new (&SymEntry, *this) Symbol(&SymEntry, /*isTemporary=*/false);
@@ -586,7 +584,7 @@ MCContext::createELFRelSection(const Twine &Name, unsigned Type, unsigned Flags,
return createELFSectionImpl(
I->getKey(), Type, Flags, EntrySize, Group, true, true,
- cast<MCSymbolELF>(RelInfoSection->getBeginSymbol()));
+ static_cast<const MCSymbolELF *>(RelInfoSection->getBeginSymbol()));
}
MCSectionELF *MCContext::getELFNamedSection(const Twine &Prefix,
@@ -604,7 +602,7 @@ MCSectionELF *MCContext::getELFSection(const Twine &Section, unsigned Type,
const MCSymbolELF *LinkedToSym) {
MCSymbolELF *GroupSym = nullptr;
if (!Group.isTriviallyEmpty() && !Group.str().empty())
- GroupSym = cast<MCSymbolELF>(getOrCreateSymbol(Group));
+ GroupSym = static_cast<MCSymbolELF *>(getOrCreateSymbol(Group));
return getELFSection(Section, Type, Flags, EntrySize, GroupSym, IsComdat,
UniqueID, LinkedToSym);
@@ -817,7 +815,7 @@ MCSectionWasm *MCContext::getWasmSection(const Twine &Section, SectionKind K,
unsigned UniqueID) {
MCSymbolWasm *GroupSym = nullptr;
if (!Group.isTriviallyEmpty() && !Group.str().empty()) {
- GroupSym = cast<MCSymbolWasm>(getOrCreateSymbol(Group));
+ GroupSym = static_cast<MCSymbolWasm *>(getOrCreateSymbol(Group));
GroupSym->setComdat(true);
if (K.isMetadata() && !GroupSym->getType().has_value()) {
// Comdat group symbol associated with a custom section is a section
@@ -848,7 +846,7 @@ MCSectionWasm *MCContext::getWasmSection(const Twine &Section, SectionKind Kind,
MCSymbol *Begin = createRenamableSymbol(CachedName, true, false);
// Begin always has a different name than CachedName... see #48596.
getSymbolTableEntry(Begin->getName()).second.Symbol = Begin;
- cast<MCSymbolWasm>(Begin)->setType(wasm::WASM_SYMBOL_TYPE_SECTION);
+ static_cast<MCSymbolWasm *>(Begin)->setType(wasm::WASM_SYMBOL_TYPE_SECTION);
MCSectionWasm *Result = new (WasmAllocator.Allocate())
MCSectionWasm(CachedName, Kind, Flags, GroupSym, UniqueID, Begin);
@@ -889,9 +887,9 @@ MCSectionXCOFF *MCContext::getXCOFFSection(
MCSymbolXCOFF *QualName = nullptr;
// Debug section don't have storage class attribute.
if (IsDwarfSec)
- QualName = cast<MCSymbolXCOFF>(getOrCreateSymbol(CachedName));
+ QualName = static_cast<MCSymbolXCOFF *>(getOrCreateSymbol(CachedName));
else
- QualName = cast<MCSymbolXCOFF>(getOrCreateSymbol(
+ QualName = static_cast<MCSymbolXCOFF *>(getOrCreateSymbol(
CachedName + "[" +
XCOFF::getMappingClassString(CsectProp->MappingClass) + "]"));
diff --git a/llvm/lib/MC/MCELFStreamer.cpp b/llvm/lib/MC/MCELFStreamer.cpp
index 38744a0..275e76e 100644
--- a/llvm/lib/MC/MCELFStreamer.cpp
+++ b/llvm/lib/MC/MCELFStreamer.cpp
@@ -59,7 +59,7 @@ void MCELFStreamer::initSections(bool NoExecStack, const MCSubtargetInfo &STI) {
}
void MCELFStreamer::emitLabel(MCSymbol *S, SMLoc Loc) {
- auto *Symbol = cast<MCSymbolELF>(S);
+ auto *Symbol = static_cast<MCSymbolELF *>(S);
MCObjectStreamer::emitLabel(Symbol, Loc);
const MCSectionELF &Section =
@@ -70,7 +70,7 @@ void MCELFStreamer::emitLabel(MCSymbol *S, SMLoc Loc) {
void MCELFStreamer::emitLabelAtPos(MCSymbol *S, SMLoc Loc, MCFragment &F,
uint64_t Offset) {
- auto *Symbol = cast<MCSymbolELF>(S);
+ auto *Symbol = static_cast<MCSymbolELF *>(S);
MCObjectStreamer::emitLabelAtPos(Symbol, Loc, F, Offset);
const MCSectionELF &Section =
@@ -95,7 +95,7 @@ void MCELFStreamer::changeSection(MCSection *Section, uint32_t Subsection) {
}
void MCELFStreamer::emitWeakReference(MCSymbol *Alias, const MCSymbol *Target) {
- auto *A = cast<MCSymbolELF>(Alias);
+ auto *A = static_cast<MCSymbolELF *>(Alias);
if (A->isDefined()) {
getContext().reportError(getStartTokLoc(), "symbol '" + A->getName() +
"' is already defined");
@@ -126,7 +126,7 @@ static unsigned CombineSymbolTypes(unsigned T1, unsigned T2) {
}
bool MCELFStreamer::emitSymbolAttribute(MCSymbol *S, MCSymbolAttr Attribute) {
- auto *Symbol = cast<MCSymbolELF>(S);
+ auto *Symbol = static_cast<MCSymbolELF *>(S);
// Adding a symbol attribute always introduces the symbol, note that an
// important side effect of calling registerSymbol here is to register
@@ -247,7 +247,7 @@ bool MCELFStreamer::emitSymbolAttribute(MCSymbol *S, MCSymbolAttr Attribute) {
void MCELFStreamer::emitCommonSymbol(MCSymbol *S, uint64_t Size,
Align ByteAlignment) {
- auto *Symbol = cast<MCSymbolELF>(S);
+ auto *Symbol = static_cast<MCSymbolELF *>(S);
getAssembler().registerSymbol(*Symbol);
if (!Symbol->isBindingSet())
@@ -272,12 +272,12 @@ void MCELFStreamer::emitCommonSymbol(MCSymbol *S, uint64_t Size,
" redeclared as different type");
}
- cast<MCSymbolELF>(Symbol)
- ->setSize(MCConstantExpr::create(Size, getContext()));
+ static_cast<MCSymbolELF *>(Symbol)->setSize(
+ MCConstantExpr::create(Size, getContext()));
}
void MCELFStreamer::emitELFSize(MCSymbol *Symbol, const MCExpr *Value) {
- cast<MCSymbolELF>(Symbol)->setSize(Value);
+ static_cast<MCSymbolELF *>(Symbol)->setSize(Value);
}
void MCELFStreamer::emitELFSymverDirective(const MCSymbol *OriginalSym,
@@ -289,7 +289,7 @@ void MCELFStreamer::emitELFSymverDirective(const MCSymbol *OriginalSym,
void MCELFStreamer::emitLocalCommonSymbol(MCSymbol *S, uint64_t Size,
Align ByteAlignment) {
- auto *Symbol = cast<MCSymbolELF>(S);
+ auto *Symbol = static_cast<MCSymbolELF *>(S);
// FIXME: Should this be caught and done earlier?
getAssembler().registerSymbol(*Symbol);
Symbol->setBinding(ELF::STB_LOCAL);
diff --git a/llvm/lib/MC/MCMachOStreamer.cpp b/llvm/lib/MC/MCMachOStreamer.cpp
index a214513..6226b02 100644
--- a/llvm/lib/MC/MCMachOStreamer.cpp
+++ b/llvm/lib/MC/MCMachOStreamer.cpp
@@ -147,7 +147,7 @@ void MCMachOStreamer::changeSection(MCSection *Section, uint32_t Subsection) {
void MCMachOStreamer::emitEHSymAttributes(const MCSymbol *Symbol,
MCSymbol *EHSymbol) {
- auto *Sym = cast<MCSymbolMachO>(Symbol);
+ auto *Sym = static_cast<const MCSymbolMachO *>(Symbol);
getAssembler().registerSymbol(*Symbol);
if (Symbol->isExternal())
emitSymbolAttribute(EHSymbol, MCSA_Global);
@@ -160,7 +160,7 @@ void MCMachOStreamer::emitEHSymAttributes(const MCSymbol *Symbol,
void MCMachOStreamer::emitLabel(MCSymbol *Symbol, SMLoc Loc) {
// We have to create a new fragment if this is an atom defining symbol,
// fragments cannot span atoms.
- if (cast<MCSymbolMachO>(Symbol)->isSymbolLinkerVisible())
+ if (static_cast<MCSymbolMachO *>(Symbol)->isSymbolLinkerVisible())
newFragment();
MCObjectStreamer::emitLabel(Symbol, Loc);
@@ -172,7 +172,7 @@ void MCMachOStreamer::emitLabel(MCSymbol *Symbol, SMLoc Loc) {
//
// FIXME: Cleanup this code, these bits should be emitted based on semantic
// properties, not on the order of definition, etc.
- cast<MCSymbolMachO>(Symbol)->clearReferenceType();
+ static_cast<MCSymbolMachO *>(Symbol)->clearReferenceType();
}
void MCMachOStreamer::emitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
@@ -182,7 +182,7 @@ void MCMachOStreamer::emitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
if (const auto *SymA = Res.getAddSym()) {
if (!Res.getSubSym() &&
(SymA->getName().empty() || Res.getConstant() != 0))
- cast<MCSymbolMachO>(Symbol)->setAltEntry();
+ static_cast<MCSymbolMachO *>(Symbol)->setAltEntry();
}
}
MCObjectStreamer::emitAssignment(Symbol, Value);
@@ -256,7 +256,7 @@ void MCMachOStreamer::emitDarwinTargetVariantBuildVersion(
bool MCMachOStreamer::emitSymbolAttribute(MCSymbol *Sym,
MCSymbolAttr Attribute) {
- MCSymbolMachO *Symbol = cast<MCSymbolMachO>(Sym);
+ auto *Symbol = static_cast<MCSymbolMachO *>(Sym);
// Indirect symbols are handled differently, to match how 'as' handles
// them. This makes writing matching .o files easier.
@@ -367,7 +367,7 @@ bool MCMachOStreamer::emitSymbolAttribute(MCSymbol *Sym,
void MCMachOStreamer::emitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {
// Encode the 'desc' value into the lowest implementation defined bits.
getAssembler().registerSymbol(*Symbol);
- cast<MCSymbolMachO>(Symbol)->setDesc(DescValue);
+ static_cast<MCSymbolMachO *>(Symbol)->setDesc(DescValue);
}
void MCMachOStreamer::emitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
@@ -430,7 +430,7 @@ void MCMachOStreamer::finishImpl() {
// defining symbols.
DenseMap<const MCFragment *, const MCSymbol *> DefiningSymbolMap;
for (const MCSymbol &Symbol : getAssembler().symbols()) {
- auto &Sym = cast<MCSymbolMachO>(Symbol);
+ auto &Sym = static_cast<const MCSymbolMachO &>(Symbol);
if (Sym.isSymbolLinkerVisible() && Sym.isInSection() && !Sym.isVariable() &&
!Sym.isAltEntry()) {
// An atom defining symbol should never be internal to a fragment.
diff --git a/llvm/lib/MC/MCObjectFileInfo.cpp b/llvm/lib/MC/MCObjectFileInfo.cpp
index 393eed1..4ac73ab 100644
--- a/llvm/lib/MC/MCObjectFileInfo.cpp
+++ b/llvm/lib/MC/MCObjectFileInfo.cpp
@@ -1135,9 +1135,10 @@ MCObjectFileInfo::getCallGraphSection(const MCSection &TextSec) const {
Flags |= ELF::SHF_GROUP;
}
- return Ctx->getELFSection(".callgraph", ELF::SHT_PROGBITS, Flags, 0,
- GroupName, true, ElfSec.getUniqueID(),
- cast<MCSymbolELF>(TextSec.getBeginSymbol()));
+ return Ctx->getELFSection(
+ ".callgraph", ELF::SHT_PROGBITS, Flags, 0, GroupName, true,
+ ElfSec.getUniqueID(),
+ static_cast<const MCSymbolELF *>(TextSec.getBeginSymbol()));
}
MCSection *
@@ -1154,9 +1155,10 @@ MCObjectFileInfo::getStackSizesSection(const MCSection &TextSec) const {
Flags |= ELF::SHF_GROUP;
}
- return Ctx->getELFSection(".stack_sizes", ELF::SHT_PROGBITS, Flags, 0,
- GroupName, true, ElfSec.getUniqueID(),
- cast<MCSymbolELF>(TextSec.getBeginSymbol()));
+ return Ctx->getELFSection(
+ ".stack_sizes", ELF::SHT_PROGBITS, Flags, 0, GroupName, true,
+ ElfSec.getUniqueID(),
+ static_cast<const MCSymbolELF *>(TextSec.getBeginSymbol()));
}
MCSection *
@@ -1174,9 +1176,10 @@ MCObjectFileInfo::getBBAddrMapSection(const MCSection &TextSec) const {
// Use the text section's begin symbol and unique ID to create a separate
// .llvm_bb_addr_map section associated with every unique text section.
- return Ctx->getELFSection(".llvm_bb_addr_map", ELF::SHT_LLVM_BB_ADDR_MAP,
- Flags, 0, GroupName, true, ElfSec.getUniqueID(),
- cast<MCSymbolELF>(TextSec.getBeginSymbol()));
+ return Ctx->getELFSection(
+ ".llvm_bb_addr_map", ELF::SHT_LLVM_BB_ADDR_MAP, Flags, 0, GroupName, true,
+ ElfSec.getUniqueID(),
+ static_cast<const MCSymbolELF *>(TextSec.getBeginSymbol()));
}
MCSection *
@@ -1192,10 +1195,10 @@ MCObjectFileInfo::getKCFITrapSection(const MCSection &TextSec) const {
Flags |= ELF::SHF_GROUP;
}
- return Ctx->getELFSection(".kcfi_traps", ELF::SHT_PROGBITS, Flags, 0,
- GroupName,
- /*IsComdat=*/true, ElfSec.getUniqueID(),
- cast<MCSymbolELF>(TextSec.getBeginSymbol()));
+ return Ctx->getELFSection(
+ ".kcfi_traps", ELF::SHT_PROGBITS, Flags, 0, GroupName,
+ /*IsComdat=*/true, ElfSec.getUniqueID(),
+ static_cast<const MCSymbolELF *>(TextSec.getBeginSymbol()));
}
MCSection *
@@ -1211,9 +1214,10 @@ MCObjectFileInfo::getPseudoProbeSection(const MCSection &TextSec) const {
Flags |= ELF::SHF_GROUP;
}
- return Ctx->getELFSection(PseudoProbeSection->getName(), ELF::SHT_PROGBITS,
- Flags, 0, GroupName, true, ElfSec.getUniqueID(),
- cast<MCSymbolELF>(TextSec.getBeginSymbol()));
+ return Ctx->getELFSection(
+ PseudoProbeSection->getName(), ELF::SHT_PROGBITS, Flags, 0, GroupName,
+ true, ElfSec.getUniqueID(),
+ static_cast<const MCSymbolELF *>(TextSec.getBeginSymbol()));
}
MCSection *
@@ -1261,7 +1265,7 @@ MCSection *MCObjectFileInfo::getPCSection(StringRef Name,
GroupName = Group->getName();
Flags |= ELF::SHF_GROUP;
}
- return Ctx->getELFSection(Name, ELF::SHT_PROGBITS, Flags, 0, GroupName, true,
- ElfSec.getUniqueID(),
- cast<MCSymbolELF>(TextSec->getBeginSymbol()));
+ return Ctx->getELFSection(
+ Name, ELF::SHT_PROGBITS, Flags, 0, GroupName, true, ElfSec.getUniqueID(),
+ static_cast<const MCSymbolELF *>(TextSec->getBeginSymbol()));
}
diff --git a/llvm/lib/MC/MCObjectStreamer.cpp b/llvm/lib/MC/MCObjectStreamer.cpp
index e277143..8c27958 100644
--- a/llvm/lib/MC/MCObjectStreamer.cpp
+++ b/llvm/lib/MC/MCObjectStreamer.cpp
@@ -46,27 +46,83 @@ MCAssembler *MCObjectStreamer::getAssemblerPtr() {
return nullptr;
}
+constexpr size_t FragBlockSize = 16384;
+// Ensure the new fragment can at least store a few bytes.
+constexpr size_t NewFragHeadroom = 8;
+
+static_assert(NewFragHeadroom >= alignof(MCFragment));
+static_assert(FragBlockSize >= sizeof(MCFragment) + NewFragHeadroom);
+
+MCFragment *MCObjectStreamer::allocFragSpace(size_t Headroom) {
+ auto Size = std::max(FragBlockSize, sizeof(MCFragment) + Headroom);
+ FragSpace = Size - sizeof(MCFragment);
+ auto Block = std::unique_ptr<uint8_t[]>(new uint8_t[Size]);
+ auto *F = reinterpret_cast<MCFragment *>(Block.get());
+ FragStorage.push_back(std::move(Block));
+ return F;
+}
+
void MCObjectStreamer::newFragment() {
- addFragment(getContext().allocFragment<MCFragment>());
+ MCFragment *F;
+ if (LLVM_LIKELY(sizeof(MCFragment) + NewFragHeadroom <= FragSpace)) {
+ auto End = reinterpret_cast<size_t>(getCurFragEnd());
+ F = reinterpret_cast<MCFragment *>(
+ alignToPowerOf2(End, alignof(MCFragment)));
+ FragSpace -= size_t(F) - End + sizeof(MCFragment);
+ } else {
+ F = allocFragSpace(0);
+ }
+ new (F) MCFragment();
+ addFragment(F);
}
-void MCObjectStreamer::insert(MCFragment *F) {
- assert(F->getKind() != MCFragment::FT_Data &&
- "F should have a variable-size tail");
+void MCObjectStreamer::ensureHeadroom(size_t Headroom) {
+ if (Headroom <= FragSpace)
+ return;
+ auto *F = allocFragSpace(Headroom);
+ new (F) MCFragment();
+ addFragment(F);
+}
+
+void MCObjectStreamer::addSpecialFragment(MCFragment *Frag) {
+ assert(Frag->getKind() != MCFragment::FT_Data &&
+ "Frag should have a variable-size tail");
+ // Frag is not connected to FragSpace. Before modifying CurFrag with
+ // addFragment(Frag), allocate an empty fragment to maintain FragSpace
+ // connectivity, potentially reusing CurFrag's associated space.
+ MCFragment *F;
+ if (LLVM_LIKELY(sizeof(MCFragment) + NewFragHeadroom <= FragSpace)) {
+ auto End = reinterpret_cast<size_t>(getCurFragEnd());
+ F = reinterpret_cast<MCFragment *>(
+ alignToPowerOf2(End, alignof(MCFragment)));
+ FragSpace -= size_t(F) - End + sizeof(MCFragment);
+ } else {
+ F = allocFragSpace(0);
+ }
+ new (F) MCFragment();
+
+ addFragment(Frag);
addFragment(F);
- newFragment();
}
void MCObjectStreamer::appendContents(ArrayRef<char> Contents) {
- CurFrag->appendContents(Contents);
+ ensureHeadroom(Contents.size());
+ assert(FragSpace >= Contents.size());
+ llvm::copy(Contents, getCurFragEnd());
+ CurFrag->FixedSize += Contents.size();
+ FragSpace -= Contents.size();
}
-void MCObjectStreamer::appendContents(size_t Num, char Elt) {
- CurFrag->appendContents(Num, Elt);
+void MCObjectStreamer::appendContents(size_t Num, uint8_t Elt) {
+ ensureHeadroom(Num);
+ MutableArrayRef<uint8_t> Data(getCurFragEnd(), Num);
+ llvm::fill(Data, Elt);
+ CurFrag->FixedSize += Num;
+ FragSpace -= Num;
}
void MCObjectStreamer::addFixup(const MCExpr *Value, MCFixupKind Kind) {
- CurFrag->addFixup(MCFixup::create(CurFrag->getFixedSize(), Value, Kind));
+ CurFrag->addFixup(MCFixup::create(getCurFragSize(), Value, Kind));
}
// As a compile-time optimization, avoid allocating and evaluating an MCExpr
@@ -115,6 +171,9 @@ void MCObjectStreamer::reset() {
}
EmitEHFrame = true;
EmitDebugFrame = false;
+ FragStorage.clear();
+ FragSpace = 0;
+ SpecialFragAllocator.Reset();
MCStreamer::reset();
}
@@ -143,7 +202,6 @@ void MCObjectStreamer::emitCFISections(bool EH, bool Debug, bool SFrame) {
void MCObjectStreamer::emitValueImpl(const MCExpr *Value, unsigned Size,
SMLoc Loc) {
MCStreamer::emitValueImpl(Value, Size, Loc);
- MCFragment *DF = getCurrentFragment();
MCDwarfLineEntry::make(this, getCurrentSectionOnly());
@@ -158,9 +216,9 @@ void MCObjectStreamer::emitValueImpl(const MCExpr *Value, unsigned Size,
emitIntValue(AbsValue, Size);
return;
}
- DF->addFixup(MCFixup::create(DF->getContents().size(), Value,
- MCFixup::getDataKindForSize(Size)));
- DF->appendContents(Size, 0);
+ ensureHeadroom(Size);
+ addFixup(Value, MCFixup::getDataKindForSize(Size));
+ appendContents(Size, 0);
}
MCSymbol *MCObjectStreamer::emitCFILabel() {
@@ -194,7 +252,7 @@ void MCObjectStreamer::emitLabel(MCSymbol *Symbol, SMLoc Loc) {
// section.
MCFragment *F = CurFrag;
Symbol->setFragment(F);
- Symbol->setOffset(F->getContents().size());
+ Symbol->setOffset(F->getFixedSize());
emitPendingAssignments(Symbol);
}
@@ -260,6 +318,21 @@ void MCObjectStreamer::changeSection(MCSection *Section, uint32_t Subsection) {
F0 = CurFrag;
}
+ // To maintain connectivity between CurFrag and FragSpace when CurFrag is
+ // modified, allocate an empty fragment and append it to the fragment list.
+ // (Subsections[I].second.Tail is not connected to FragSpace.)
+ MCFragment *F;
+ if (LLVM_LIKELY(sizeof(MCFragment) + NewFragHeadroom <= FragSpace)) {
+ auto End = reinterpret_cast<size_t>(getCurFragEnd());
+ F = reinterpret_cast<MCFragment *>(
+ alignToPowerOf2(End, alignof(MCFragment)));
+ FragSpace -= size_t(F) - End + sizeof(MCFragment);
+ } else {
+ F = allocFragSpace(0);
+ }
+ new (F) MCFragment();
+ F->setParent(Section);
+
auto &Subsections = Section->Subsections;
size_t I = 0, E = Subsections.size();
while (I != E && Subsections[I].first < Subsection)
@@ -267,13 +340,16 @@ void MCObjectStreamer::changeSection(MCSection *Section, uint32_t Subsection) {
// If the subsection number is not in the sorted Subsections list, create a
// new fragment list.
if (I == E || Subsections[I].first != Subsection) {
- auto *F = getContext().allocFragment<MCFragment>();
- F->setParent(Section);
Subsections.insert(Subsections.begin() + I,
{Subsection, MCSection::FragList{F, F}});
+ Section->CurFragList = &Subsections[I].second;
+ CurFrag = F;
+ } else {
+ Section->CurFragList = &Subsections[I].second;
+ CurFrag = Subsections[I].second.Tail;
+ // Ensure CurFrag is associated with FragSpace.
+ addFragment(F);
}
- Section->CurFragList = &Subsections[I].second;
- CurFrag = Section->CurFragList->Tail;
// Define the section symbol at subsection 0's initial fragment if required.
if (!NewSec)
@@ -344,11 +420,15 @@ void MCObjectStreamer::emitInstToData(const MCInst &Inst,
MCFragment *F = getCurrentFragment();
// Append the instruction to the data fragment.
- size_t CodeOffset = F->getContents().size();
+ size_t CodeOffset = getCurFragSize();
+ SmallString<16> Content;
SmallVector<MCFixup, 1> Fixups;
- getAssembler().getEmitter().encodeInstruction(
- Inst, F->getContentsForAppending(), Fixups, STI);
- F->doneAppending();
+ getAssembler().getEmitter().encodeInstruction(Inst, Content, Fixups, STI);
+ appendContents(Content);
+ if (CurFrag != F) {
+ F = CurFrag;
+ CodeOffset = 0;
+ }
F->setHasInstructions(STI);
if (Fixups.empty())
@@ -570,7 +650,7 @@ void MCObjectStreamer::emitCodeAlignment(Align Alignment,
void MCObjectStreamer::emitValueToOffset(const MCExpr *Offset,
unsigned char Value,
SMLoc Loc) {
- insert(getContext().allocFragment<MCOrgFragment>(*Offset, Value, Loc));
+ newSpecialFragment<MCOrgFragment>(*Offset, Value, Loc);
}
void MCObjectStreamer::emitRelocDirective(const MCExpr &Offset, StringRef Name,
@@ -602,8 +682,7 @@ void MCObjectStreamer::emitRelocDirective(const MCExpr &Offset, StringRef Name,
void MCObjectStreamer::emitFill(const MCExpr &NumBytes, uint64_t FillValue,
SMLoc Loc) {
assert(getCurrentSectionOnly() && "need a section");
- insert(
- getContext().allocFragment<MCFillFragment>(FillValue, 1, NumBytes, Loc));
+ newSpecialFragment<MCFillFragment>(FillValue, 1, NumBytes, Loc);
}
void MCObjectStreamer::emitFill(const MCExpr &NumValues, int64_t Size,
@@ -630,15 +709,13 @@ void MCObjectStreamer::emitFill(const MCExpr &NumValues, int64_t Size,
// Otherwise emit as fragment.
assert(getCurrentSectionOnly() && "need a section");
- insert(
- getContext().allocFragment<MCFillFragment>(Expr, Size, NumValues, Loc));
+ newSpecialFragment<MCFillFragment>(Expr, Size, NumValues, Loc);
}
void MCObjectStreamer::emitNops(int64_t NumBytes, int64_t ControlledNopLength,
SMLoc Loc, const MCSubtargetInfo &STI) {
assert(getCurrentSectionOnly() && "need a section");
- insert(getContext().allocFragment<MCNopsFragment>(
- NumBytes, ControlledNopLength, Loc, STI));
+ newSpecialFragment<MCNopsFragment>(NumBytes, ControlledNopLength, Loc, STI);
}
void MCObjectStreamer::emitFileDirective(StringRef Filename) {
diff --git a/llvm/lib/MC/MCParser/AsmParser.cpp b/llvm/lib/MC/MCParser/AsmParser.cpp
index 9f64a98..7782dc1 100644
--- a/llvm/lib/MC/MCParser/AsmParser.cpp
+++ b/llvm/lib/MC/MCParser/AsmParser.cpp
@@ -1865,7 +1865,7 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
}
if (MAI.hasSubsectionsViaSymbols() && CFIStartProcLoc &&
- Sym->isExternal() && !cast<MCSymbolMachO>(Sym)->isAltEntry())
+ Sym->isExternal() && !static_cast<MCSymbolMachO *>(Sym)->isAltEntry())
return Error(StartTokLoc, "non-private labels cannot appear between "
".cfi_startproc / .cfi_endproc pairs") &&
Error(*CFIStartProcLoc, "previous .cfi_startproc was here");
@@ -6273,7 +6273,8 @@ bool parseAssignmentExpression(StringRef Name, bool allow_redef,
// used as a symbol, or it is an absolute symbol).
Sym = Parser.getContext().lookupSymbol(Name);
if (Sym) {
- if (!Sym->isUnset() && (!allow_redef || !Sym->isRedefinable()))
+ if ((Sym->isVariable() || Sym->isDefined()) &&
+ (!allow_redef || !Sym->isRedefinable()))
return Parser.Error(EqualLoc, "redefinition of '" + Name + "'");
// If the symbol is redefinable, clone it and update the symbol table
// to the new symbol. Existing references to the original symbol remain
diff --git a/llvm/lib/MC/MCParser/COFFMasmParser.cpp b/llvm/lib/MC/MCParser/COFFMasmParser.cpp
index 282f22f..229b0b8 100644
--- a/llvm/lib/MC/MCParser/COFFMasmParser.cpp
+++ b/llvm/lib/MC/MCParser/COFFMasmParser.cpp
@@ -460,7 +460,8 @@ bool COFFMasmParser::parseDirectiveProc(StringRef Directive, SMLoc Loc) {
nextLoc = getTok().getLoc();
}
}
- MCSymbolCOFF *Sym = cast<MCSymbolCOFF>(getContext().getOrCreateSymbol(Label));
+ auto *Sym =
+ static_cast<MCSymbolCOFF *>(getContext().getOrCreateSymbol(Label));
// Define symbol as simple external function
Sym->setExternal(true);
diff --git a/llvm/lib/MC/MCParser/ELFAsmParser.cpp b/llvm/lib/MC/MCParser/ELFAsmParser.cpp
index 2e251cc..6782c4b 100644
--- a/llvm/lib/MC/MCParser/ELFAsmParser.cpp
+++ b/llvm/lib/MC/MCParser/ELFAsmParser.cpp
@@ -200,7 +200,7 @@ bool ELFAsmParser::parseDirectiveSize(StringRef, SMLoc) {
StringRef Name;
if (getParser().parseIdentifier(Name))
return TokError("expected identifier");
- MCSymbolELF *Sym = cast<MCSymbolELF>(getContext().getOrCreateSymbol(Name));
+ auto *Sym = static_cast<MCSymbolELF *>(getContext().getOrCreateSymbol(Name));
if (getLexer().isNot(AsmToken::Comma))
return TokError("expected comma");
@@ -466,7 +466,7 @@ bool ELFAsmParser::parseLinkedToSym(MCSymbolELF *&LinkedToSym) {
}
return TokError("invalid linked-to symbol");
}
- LinkedToSym = dyn_cast_or_null<MCSymbolELF>(getContext().lookupSymbol(Name));
+ LinkedToSym = static_cast<MCSymbolELF *>(getContext().lookupSymbol(Name));
if (!LinkedToSym || !LinkedToSym->isInSection())
return Error(StartLoc, "linked-to symbol is not in a section: " + Name);
return false;
diff --git a/llvm/lib/MC/MCParser/WasmAsmParser.cpp b/llvm/lib/MC/MCParser/WasmAsmParser.cpp
index d97f4f5..6c2d241 100644
--- a/llvm/lib/MC/MCParser/WasmAsmParser.cpp
+++ b/llvm/lib/MC/MCParser/WasmAsmParser.cpp
@@ -224,7 +224,7 @@ public:
return true;
if (expect(AsmToken::EndOfStatement, "eol"))
return true;
- auto WasmSym = cast<MCSymbolWasm>(Sym);
+ auto WasmSym = static_cast<const MCSymbolWasm *>(Sym);
if (WasmSym->isFunction()) {
// Ignore .size directives for function symbols. They get their size
// set automatically based on their content.
@@ -241,9 +241,9 @@ public:
if (!Lexer->is(AsmToken::Identifier))
return error("Expected label after .type directive, got: ",
Lexer->getTok());
- auto WasmSym = cast<MCSymbolWasm>(
- getStreamer().getContext().getOrCreateSymbol(
- Lexer->getTok().getString()));
+ auto *WasmSym = static_cast<MCSymbolWasm *>(
+ getStreamer().getContext().getOrCreateSymbol(
+ Lexer->getTok().getString()));
Lex();
if (!(isNext(AsmToken::Comma) && isNext(AsmToken::At) &&
Lexer->is(AsmToken::Identifier)))
diff --git a/llvm/lib/MC/MCSection.cpp b/llvm/lib/MC/MCSection.cpp
index 4f28267..27ca131 100644
--- a/llvm/lib/MC/MCSection.cpp
+++ b/llvm/lib/MC/MCSection.cpp
@@ -83,12 +83,14 @@ void MCFragment::appendFixups(ArrayRef<MCFixup> Fixups) {
}
void MCFragment::setVarFixups(ArrayRef<MCFixup> Fixups) {
+ assert(Fixups.size() < 256 &&
+ "variable-size tail cannot have more than 256 fixups");
auto &S = getParent()->FixupStorage;
- if (VarFixupStart + Fixups.size() > VarFixupEnd) {
+ if (Fixups.size() > VarFixupSize) {
VarFixupStart = S.size();
S.resize_for_overwrite(S.size() + Fixups.size());
}
- VarFixupEnd = VarFixupStart + Fixups.size();
+ VarFixupSize = Fixups.size();
// Source fixup offsets are relative to the variable part's start. Add the
// fixed part size to make them relative to the fixed part's start.
std::transform(Fixups.begin(), Fixups.end(), S.begin() + VarFixupStart,
diff --git a/llvm/lib/MC/MCWasmStreamer.cpp b/llvm/lib/MC/MCWasmStreamer.cpp
index e3ef111..9c8b224 100644
--- a/llvm/lib/MC/MCWasmStreamer.cpp
+++ b/llvm/lib/MC/MCWasmStreamer.cpp
@@ -36,7 +36,7 @@ using namespace llvm;
MCWasmStreamer::~MCWasmStreamer() = default; // anchor.
void MCWasmStreamer::emitLabel(MCSymbol *S, SMLoc Loc) {
- auto *Symbol = cast<MCSymbolWasm>(S);
+ auto *Symbol = static_cast<MCSymbolWasm *>(S);
MCObjectStreamer::emitLabel(Symbol, Loc);
const MCSectionWasm &Section =
@@ -47,7 +47,7 @@ void MCWasmStreamer::emitLabel(MCSymbol *S, SMLoc Loc) {
void MCWasmStreamer::emitLabelAtPos(MCSymbol *S, SMLoc Loc, MCFragment &F,
uint64_t Offset) {
- auto *Symbol = cast<MCSymbolWasm>(S);
+ auto *Symbol = static_cast<MCSymbolWasm *>(S);
MCObjectStreamer::emitLabelAtPos(Symbol, Loc, F, Offset);
const MCSectionWasm &Section =
@@ -69,8 +69,7 @@ void MCWasmStreamer::changeSection(MCSection *Section, uint32_t Subsection) {
bool MCWasmStreamer::emitSymbolAttribute(MCSymbol *S, MCSymbolAttr Attribute) {
assert(Attribute != MCSA_IndirectSymbol && "indirect symbols not supported");
-
- auto *Symbol = cast<MCSymbolWasm>(S);
+ auto *Symbol = static_cast<MCSymbolWasm *>(S);
// Adding a symbol attribute always introduces the symbol; note that an
// important side effect of calling registerSymbol here is to register the
@@ -135,7 +134,7 @@ void MCWasmStreamer::emitCommonSymbol(MCSymbol *S, uint64_t Size,
}
void MCWasmStreamer::emitELFSize(MCSymbol *Symbol, const MCExpr *Value) {
- cast<MCSymbolWasm>(Symbol)->setSize(Value);
+ static_cast<MCSymbolWasm *>(Symbol)->setSize(Value);
}
void MCWasmStreamer::emitLocalCommonSymbol(MCSymbol *S, uint64_t Size,
diff --git a/llvm/lib/MC/MCWin64EH.cpp b/llvm/lib/MC/MCWin64EH.cpp
index 72a8dd7..a87648a 100644
--- a/llvm/lib/MC/MCWin64EH.cpp
+++ b/llvm/lib/MC/MCWin64EH.cpp
@@ -318,6 +318,9 @@ static void EmitUnwindInfo(MCStreamer &streamer, WinEH::FrameInfo *info) {
// Emit the epilog instructions.
if (EnableUnwindV2) {
+ // Ensure the fixups and appended content apply to the same fragment.
+ OS->ensureHeadroom(info->EpilogMap.size() * 2);
+
bool IsLast = true;
for (const auto &Epilog : llvm::reverse(info->EpilogMap)) {
if (IsLast) {
diff --git a/llvm/lib/MC/MCWinCOFFStreamer.cpp b/llvm/lib/MC/MCWinCOFFStreamer.cpp
index 1ffe25c..a45936b 100644
--- a/llvm/lib/MC/MCWinCOFFStreamer.cpp
+++ b/llvm/lib/MC/MCWinCOFFStreamer.cpp
@@ -163,13 +163,13 @@ void MCWinCOFFStreamer::changeSection(MCSection *Section, uint32_t Subsection) {
}
void MCWinCOFFStreamer::emitLabel(MCSymbol *S, SMLoc Loc) {
- auto *Symbol = cast<MCSymbolCOFF>(S);
+ auto *Symbol = static_cast<MCSymbolCOFF *>(S);
MCObjectStreamer::emitLabel(Symbol, Loc);
}
bool MCWinCOFFStreamer::emitSymbolAttribute(MCSymbol *S,
MCSymbolAttr Attribute) {
- auto *Symbol = cast<MCSymbolCOFF>(S);
+ auto *Symbol = static_cast<MCSymbolCOFF *>(S);
getAssembler().registerSymbol(*Symbol);
switch (Attribute) {
@@ -199,11 +199,10 @@ void MCWinCOFFStreamer::emitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {
}
void MCWinCOFFStreamer::beginCOFFSymbolDef(MCSymbol const *S) {
- auto *Symbol = cast<MCSymbolCOFF>(S);
if (CurSymbol)
Error("starting a new symbol definition without completing the "
"previous one");
- CurSymbol = Symbol;
+ CurSymbol = static_cast<MCSymbolCOFF *>(const_cast<MCSymbol *>(S));
}
void MCWinCOFFStreamer::emitCOFFSymbolStorageClass(int StorageClass) {
@@ -219,7 +218,7 @@ void MCWinCOFFStreamer::emitCOFFSymbolStorageClass(int StorageClass) {
}
getAssembler().registerSymbol(*CurSymbol);
- cast<MCSymbolCOFF>(CurSymbol)->setClass((uint16_t)StorageClass);
+ static_cast<MCSymbolCOFF *>(CurSymbol)->setClass((uint16_t)StorageClass);
}
void MCWinCOFFStreamer::emitCOFFSymbolType(int Type) {
@@ -234,7 +233,7 @@ void MCWinCOFFStreamer::emitCOFFSymbolType(int Type) {
}
getAssembler().registerSymbol(*CurSymbol);
- cast<MCSymbolCOFF>(CurSymbol)->setType((uint16_t)Type);
+ static_cast<const MCSymbolCOFF *>(CurSymbol)->setType((uint16_t)Type);
}
void MCWinCOFFStreamer::endCOFFSymbolDef() {
@@ -249,7 +248,7 @@ void MCWinCOFFStreamer::emitCOFFSafeSEH(MCSymbol const *Symbol) {
if (getContext().getTargetTriple().getArch() != Triple::x86)
return;
- const MCSymbolCOFF *CSymbol = cast<MCSymbolCOFF>(Symbol);
+ auto *CSymbol = static_cast<const MCSymbolCOFF *>(Symbol);
if (CSymbol->isSafeSEH())
return;
@@ -258,7 +257,7 @@ void MCWinCOFFStreamer::emitCOFFSafeSEH(MCSymbol const *Symbol) {
switchSection(SXData);
SXData->ensureMinAlignment(Align(4));
- insert(getContext().allocFragment<MCSymbolIdFragment>(Symbol));
+ newSpecialFragment<MCSymbolIdFragment>(Symbol);
getAssembler().registerSymbol(*Symbol);
CSymbol->setIsSafeSEH();
@@ -273,13 +272,14 @@ void MCWinCOFFStreamer::emitCOFFSymbolIndex(MCSymbol const *Symbol) {
MCSection *Sec = getCurrentSectionOnly();
Sec->ensureMinAlignment(Align(4));
- insert(getContext().allocFragment<MCSymbolIdFragment>(Symbol));
+ newSpecialFragment<MCSymbolIdFragment>(Symbol);
getAssembler().registerSymbol(*Symbol);
}
void MCWinCOFFStreamer::emitCOFFSectionIndex(const MCSymbol *Symbol) {
visitUsedSymbol(*Symbol);
const MCSymbolRefExpr *SRE = MCSymbolRefExpr::create(Symbol, getContext());
+ ensureHeadroom(2);
addFixup(SRE, FK_SecRel_2);
appendContents(2, 0);
}
@@ -293,6 +293,7 @@ void MCWinCOFFStreamer::emitCOFFSecRel32(const MCSymbol *Symbol,
if (Offset)
MCE = MCBinaryExpr::createAdd(
MCE, MCConstantExpr::create(Offset, getContext()), getContext());
+ ensureHeadroom(4);
addFixup(MCE, FK_SecRel_4);
// Emit 4 bytes (zeros) to the object file.
appendContents(4, 0);
@@ -308,6 +309,7 @@ void MCWinCOFFStreamer::emitCOFFImgRel32(const MCSymbol *Symbol,
if (Offset)
MCE = MCBinaryExpr::createAdd(
MCE, MCConstantExpr::create(Offset, getContext()), getContext());
+ ensureHeadroom(4);
addFixup(MCE, FK_Data_4);
// Emit 4 bytes (zeros) to the object file.
appendContents(4, 0);
@@ -318,6 +320,7 @@ void MCWinCOFFStreamer::emitCOFFSecNumber(MCSymbol const *Symbol) {
// Create Symbol for section number.
const MCExpr *MCE = MCCOFFSectionNumberTargetExpr::create(
*Symbol, this->getWriter(), getContext());
+ ensureHeadroom(4);
addFixup(MCE, FK_Data_4);
// Emit 4 bytes (zeros) to the object file.
appendContents(4, 0);
@@ -328,6 +331,7 @@ void MCWinCOFFStreamer::emitCOFFSecOffset(MCSymbol const *Symbol) {
// Create Symbol for section offset.
const MCExpr *MCE =
MCCOFFSectionOffsetTargetExpr::create(*Symbol, getContext());
+ ensureHeadroom(4);
addFixup(MCE, FK_Data_4);
// Emit 4 bytes (zeros) to the object file.
appendContents(4, 0);
@@ -335,7 +339,7 @@ void MCWinCOFFStreamer::emitCOFFSecOffset(MCSymbol const *Symbol) {
void MCWinCOFFStreamer::emitCommonSymbol(MCSymbol *S, uint64_t Size,
Align ByteAlignment) {
- auto *Symbol = cast<MCSymbolCOFF>(S);
+ auto *Symbol = static_cast<MCSymbolCOFF *>(S);
const Triple &T = getContext().getTargetTriple();
if (T.isWindowsMSVCEnvironment()) {
@@ -367,7 +371,7 @@ void MCWinCOFFStreamer::emitCommonSymbol(MCSymbol *S, uint64_t Size,
void MCWinCOFFStreamer::emitLocalCommonSymbol(MCSymbol *S, uint64_t Size,
Align ByteAlignment) {
- auto *Symbol = cast<MCSymbolCOFF>(S);
+ auto *Symbol = static_cast<MCSymbolCOFF *>(S);
MCSection *Section = getContext().getObjectFileInfo()->getBSSSection();
pushSection();
@@ -382,7 +386,7 @@ void MCWinCOFFStreamer::emitLocalCommonSymbol(MCSymbol *S, uint64_t Size,
// Hack: Used by llvm-ml to implement the alias directive.
void MCWinCOFFStreamer::emitWeakReference(MCSymbol *AliasS,
const MCSymbol *Symbol) {
- auto *Alias = cast<MCSymbolCOFF>(AliasS);
+ auto *Alias = static_cast<MCSymbolCOFF *>(AliasS);
emitSymbolAttribute(Alias, MCSA_Weak);
Alias->setIsWeakExternal(true);
@@ -410,7 +414,7 @@ void MCWinCOFFStreamer::emitCGProfileEntry(const MCSymbolRefExpr *From,
void MCWinCOFFStreamer::finalizeCGProfileEntry(const MCSymbolRefExpr *&SRE) {
const MCSymbol *S = &SRE->getSymbol();
if (getAssembler().registerSymbol(*S))
- cast<MCSymbolCOFF>(S)->setExternal(true);
+ static_cast<const MCSymbolCOFF *>(S)->setExternal(true);
}
void MCWinCOFFStreamer::finishImpl() {
diff --git a/llvm/lib/MC/MCXCOFFStreamer.cpp b/llvm/lib/MC/MCXCOFFStreamer.cpp
index 26f45ce..a0e3dba 100644
--- a/llvm/lib/MC/MCXCOFFStreamer.cpp
+++ b/llvm/lib/MC/MCXCOFFStreamer.cpp
@@ -52,7 +52,7 @@ void MCXCOFFStreamer::changeSection(MCSection *Section, uint32_t Subsection) {
bool MCXCOFFStreamer::emitSymbolAttribute(MCSymbol *Sym,
MCSymbolAttr Attribute) {
- auto *Symbol = cast<MCSymbolXCOFF>(Sym);
+ auto *Symbol = static_cast<MCSymbolXCOFF *>(Sym);
getAssembler().registerSymbol(*Symbol);
switch (Attribute) {
@@ -109,7 +109,7 @@ void MCXCOFFStreamer::emitXCOFFRefDirective(const MCSymbol *Symbol) {
void MCXCOFFStreamer::emitXCOFFRenameDirective(const MCSymbol *Name,
StringRef Rename) {
- const MCSymbolXCOFF *Symbol = cast<const MCSymbolXCOFF>(Name);
+ auto *Symbol = static_cast<const MCSymbolXCOFF *>(Name);
if (!Symbol->hasRename())
report_fatal_error("Only explicit .rename is supported for XCOFF.");
}
@@ -129,15 +129,14 @@ void MCXCOFFStreamer::emitXCOFFCInfoSym(StringRef Name, StringRef Metadata) {
void MCXCOFFStreamer::emitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
Align ByteAlignment) {
+ auto *Sym = static_cast<MCSymbolXCOFF *>(Symbol);
getAssembler().registerSymbol(*Symbol);
- Symbol->setExternal(cast<MCSymbolXCOFF>(Symbol)->getStorageClass() !=
- XCOFF::C_HIDEXT);
+ Symbol->setExternal(Sym->getStorageClass() != XCOFF::C_HIDEXT);
Symbol->setCommon(Size, ByteAlignment);
// Default csect align is 4, but common symbols have explicit alignment values
// and we should honor it.
- cast<MCSymbolXCOFF>(Symbol)->getRepresentedCsect()->setAlignment(
- ByteAlignment);
+ Sym->getRepresentedCsect()->setAlignment(ByteAlignment);
// Emit the alignment and storage for the variable to the section.
emitValueToAlignment(ByteAlignment);
diff --git a/llvm/lib/MC/MachObjectWriter.cpp b/llvm/lib/MC/MachObjectWriter.cpp
index e87696a..eb59e39 100644
--- a/llvm/lib/MC/MachObjectWriter.cpp
+++ b/llvm/lib/MC/MachObjectWriter.cpp
@@ -72,7 +72,7 @@ bool MachObjectWriter::doesSymbolRequireExternRelocation(const MCSymbol &S) {
// References to weak definitions require external relocation entries; the
// definition may not always be the one in the same object file.
- if (cast<MCSymbolMachO>(S).isWeakDefinition())
+ if (static_cast<const MCSymbolMachO &>(S).isWeakDefinition())
return true;
// Otherwise, we can use an internal relocation.
@@ -383,15 +383,16 @@ const MCSymbol &MachObjectWriter::findAliasedSymbol(const MCSymbol &Sym) const {
}
void MachObjectWriter::writeNlist(MachSymbolData &MSD, const MCAssembler &Asm) {
- const MCSymbol *Symbol = MSD.Symbol;
- const auto &Data = cast<MCSymbolMachO>(*Symbol);
- const MCSymbol *AliasedSymbol = &findAliasedSymbol(*Symbol);
+ auto *Symbol = static_cast<const MCSymbolMachO *>(MSD.Symbol);
+ const auto &Data = static_cast<const MCSymbolMachO &>(*Symbol);
+ auto *AliasedSymbol =
+ static_cast<const MCSymbolMachO *>(&findAliasedSymbol(*Symbol));
uint8_t SectionIndex = MSD.SectionIndex;
uint8_t Type = 0;
uint64_t Address = 0;
bool IsAlias = Symbol != AliasedSymbol;
- const MCSymbol &OrigSymbol = *Symbol;
+ const MCSymbolMachO &OrigSymbol = *Symbol;
MachSymbolData *AliaseeInfo;
if (IsAlias) {
AliaseeInfo = findSymbolData(*AliasedSymbol);
@@ -441,9 +442,8 @@ void MachObjectWriter::writeNlist(MachSymbolData &MSD, const MCAssembler &Asm) {
// The Mach-O streamer uses the lowest 16-bits of the flags for the 'desc'
// value.
- bool EncodeAsAltEntry =
- IsAlias && cast<MCSymbolMachO>(OrigSymbol).isAltEntry();
- W.write<uint16_t>(cast<MCSymbolMachO>(Symbol)->getEncodedFlags(EncodeAsAltEntry));
+ bool EncodeAsAltEntry = IsAlias && OrigSymbol.isAltEntry();
+ W.write<uint16_t>(Symbol->getEncodedFlags(EncodeAsAltEntry));
if (is64Bit())
W.write<uint64_t>(Address);
else
@@ -570,7 +570,8 @@ void MachObjectWriter::bindIndirectSymbols(MCAssembler &Asm) {
//
// FIXME: Do not hardcode.
if (Asm.registerSymbol(*ISD.Symbol))
- cast<MCSymbolMachO>(ISD.Symbol)->setReferenceTypeUndefinedLazy(true);
+ static_cast<MCSymbolMachO *>(ISD.Symbol)
+ ->setReferenceTypeUndefinedLazy(true);
}
}
@@ -588,7 +589,7 @@ void MachObjectWriter::computeSymbolTable(
// Build the string table.
for (const MCSymbol &Symbol : Asm.symbols()) {
- if (!cast<MCSymbolMachO>(Symbol).isSymbolLinkerVisible())
+ if (!static_cast<const MCSymbolMachO &>(Symbol).isSymbolLinkerVisible())
continue;
StringTable.add(Symbol.getName());
@@ -602,7 +603,7 @@ void MachObjectWriter::computeSymbolTable(
// important for letting us diff .o files.
for (const MCSymbol &Symbol : Asm.symbols()) {
// Ignore non-linker visible symbols.
- if (!cast<MCSymbolMachO>(Symbol).isSymbolLinkerVisible())
+ if (!static_cast<const MCSymbolMachO &>(Symbol).isSymbolLinkerVisible())
continue;
if (!Symbol.isExternal() && !Symbol.isUndefined())
@@ -628,7 +629,7 @@ void MachObjectWriter::computeSymbolTable(
// Now add the data for local symbols.
for (const MCSymbol &Symbol : Asm.symbols()) {
// Ignore non-linker visible symbols.
- if (!cast<MCSymbolMachO>(Symbol).isSymbolLinkerVisible())
+ if (!static_cast<const MCSymbolMachO &>(Symbol).isSymbolLinkerVisible())
continue;
if (Symbol.isExternal() || Symbol.isUndefined())
diff --git a/llvm/lib/MC/WasmObjectWriter.cpp b/llvm/lib/MC/WasmObjectWriter.cpp
index bfd6334..af009a4 100644
--- a/llvm/lib/MC/WasmObjectWriter.cpp
+++ b/llvm/lib/MC/WasmObjectWriter.cpp
@@ -487,7 +487,7 @@ void WasmObjectWriter::recordRelocation(const MCFragment &F,
bool IsLocRel = false;
if (const auto *RefB = Target.getSubSym()) {
- const auto &SymB = cast<MCSymbolWasm>(*RefB);
+ auto &SymB = static_cast<const MCSymbolWasm &>(*RefB);
if (FixupSection.isText()) {
Ctx.reportError(Fixup.getLoc(),
@@ -515,7 +515,7 @@ void WasmObjectWriter::recordRelocation(const MCFragment &F,
}
// We either rejected the fixup or folded B into C at this point.
- const auto *SymA = cast<MCSymbolWasm>(Target.getAddSym());
+ auto *SymA = static_cast<const MCSymbolWasm *>(Target.getAddSym());
// The .init_array isn't translated as data, so don't do relocations in it.
if (FixupSection.getName().starts_with(".init_array")) {
@@ -561,7 +561,7 @@ void WasmObjectWriter::recordRelocation(const MCFragment &F,
report_fatal_error("section symbol is required for relocation");
C += Asm->getSymbolOffset(*SymA);
- SymA = cast<MCSymbolWasm>(SectionSymbol);
+ SymA = static_cast<const MCSymbolWasm *>(SectionSymbol);
}
if (Type == wasm::R_WASM_TABLE_INDEX_REL_SLEB ||
@@ -573,7 +573,7 @@ void WasmObjectWriter::recordRelocation(const MCFragment &F,
// TABLE_INDEX relocs implicitly use the default indirect function table.
// We require the function table to have already been defined.
auto TableName = "__indirect_function_table";
- MCSymbolWasm *Sym = cast_or_null<MCSymbolWasm>(Ctx.lookupSymbol(TableName));
+ auto *Sym = static_cast<MCSymbolWasm *>(Ctx.lookupSymbol(TableName));
if (!Sym) {
report_fatal_error("missing indirect function table symbol");
} else {
@@ -631,8 +631,8 @@ WasmObjectWriter::getProvisionalValue(const MCAssembler &Asm,
case wasm::R_WASM_TABLE_INDEX_I32:
case wasm::R_WASM_TABLE_INDEX_I64: {
// Provisional value is table address of the resolved symbol itself
- const MCSymbolWasm *Base =
- cast<MCSymbolWasm>(Asm.getBaseSymbol(*RelEntry.Symbol));
+ auto *Base =
+ static_cast<const MCSymbolWasm *>(Asm.getBaseSymbol(*RelEntry.Symbol));
assert(Base->isFunction());
if (RelEntry.Type == wasm::R_WASM_TABLE_INDEX_REL_SLEB ||
RelEntry.Type == wasm::R_WASM_TABLE_INDEX_REL_SLEB64)
@@ -1342,11 +1342,11 @@ void WasmObjectWriter::prepareImports(
// Register types for all functions, including those with private linkage
// (because wasm always needs a type signature).
if (WS.isFunction()) {
- const auto *BS = Asm.getBaseSymbol(S);
+ auto *BS = static_cast<const MCSymbolWasm *>(Asm.getBaseSymbol(S));
if (!BS)
report_fatal_error(Twine(S.getName()) +
": absolute addressing not supported!");
- registerFunctionType(*cast<MCSymbolWasm>(BS));
+ registerFunctionType(*BS);
}
if (WS.isTag())
@@ -1516,10 +1516,10 @@ uint64_t WasmObjectWriter::writeOneObject(MCAssembler &Asm,
// For user-defined custom sections, strip the prefix
Name.consume_front(".custom_section.");
- MCSymbol *Begin = Sec.getBeginSymbol();
+ auto *Begin = static_cast<MCSymbolWasm *>(Sec.getBeginSymbol());
if (Begin) {
- assert(WasmIndices.count(cast<MCSymbolWasm>(Begin)) == 0);
- WasmIndices[cast<MCSymbolWasm>(Begin)] = CustomSections.size();
+ assert(WasmIndices.count(Begin) == 0);
+ WasmIndices[Begin] = CustomSections.size();
}
// Separate out the producers and target features sections
@@ -1719,7 +1719,7 @@ uint64_t WasmObjectWriter::writeOneObject(MCAssembler &Asm,
if (!BS)
report_fatal_error(Twine(S.getName()) +
": absolute addressing not supported!");
- const MCSymbolWasm *Base = cast<MCSymbolWasm>(BS);
+ const MCSymbolWasm *Base = static_cast<const MCSymbolWasm *>(BS);
// Find the target symbol of this weak alias and export that index
const auto &WS = static_cast<const MCSymbolWasm &>(S);
@@ -1829,8 +1829,8 @@ uint64_t WasmObjectWriter::writeOneObject(MCAssembler &Asm,
Rel.Type != wasm::R_WASM_TABLE_INDEX_REL_SLEB64)
return;
assert(Rel.Symbol->isFunction());
- const MCSymbolWasm *Base =
- cast<MCSymbolWasm>(Asm.getBaseSymbol(*Rel.Symbol));
+ auto *Base =
+ static_cast<const MCSymbolWasm *>(Asm.getBaseSymbol(*Rel.Symbol));
uint32_t FunctionIndex = WasmIndices.find(Base)->second;
uint32_t TableIndex = TableElems.size() + InitialTableOffset;
if (TableIndices.try_emplace(Base, TableIndex).second) {
@@ -1880,7 +1880,8 @@ uint64_t WasmObjectWriter::writeOneObject(MCAssembler &Asm,
if (!SymRef)
report_fatal_error(
"fixups in .init_array should be symbol references");
- const auto &TargetSym = cast<const MCSymbolWasm>(SymRef->getSymbol());
+ auto &TargetSym =
+ static_cast<const MCSymbolWasm &>(SymRef->getSymbol());
if (TargetSym.getIndex() == InvalidIndex)
report_fatal_error("symbols in .init_array should exist in symtab");
if (!TargetSym.isFunction())
@@ -1905,7 +1906,7 @@ uint64_t WasmObjectWriter::writeOneObject(MCAssembler &Asm,
writeExportSection(Exports);
const MCSymbol *IndirectFunctionTable =
getContext().lookupSymbol("__indirect_function_table");
- writeElemSection(cast_or_null<const MCSymbolWasm>(IndirectFunctionTable),
+ writeElemSection(static_cast<const MCSymbolWasm *>(IndirectFunctionTable),
TableElems);
writeDataCountSection();
diff --git a/llvm/lib/MC/WinCOFFObjectWriter.cpp b/llvm/lib/MC/WinCOFFObjectWriter.cpp
index 856850d..0cc5ff5 100644
--- a/llvm/lib/MC/WinCOFFObjectWriter.cpp
+++ b/llvm/lib/MC/WinCOFFObjectWriter.cpp
@@ -382,7 +382,8 @@ void WinCOFFWriter::defineSymbol(const MCSymbol &MCSym) {
COFFSymbol *Sym = GetOrCreateCOFFSymbol(&MCSym);
COFFSymbol *Local = nullptr;
- if (cast<MCSymbolCOFF>(MCSym).getWeakExternalCharacteristics()) {
+ if (static_cast<const MCSymbolCOFF &>(MCSym)
+ .getWeakExternalCharacteristics()) {
Sym->Data.StorageClass = COFF::IMAGE_SYM_CLASS_WEAK_EXTERNAL;
Sym->Section = nullptr;
@@ -406,7 +407,8 @@ void WinCOFFWriter::defineSymbol(const MCSymbol &MCSym) {
Sym->Aux[0].AuxType = ATWeakExternal;
Sym->Aux[0].Aux.WeakExternal.TagIndex = 0; // Filled in later
Sym->Aux[0].Aux.WeakExternal.Characteristics =
- cast<MCSymbolCOFF>(MCSym).getWeakExternalCharacteristics();
+ static_cast<const MCSymbolCOFF &>(MCSym)
+ .getWeakExternalCharacteristics();
} else {
if (!Base)
Sym->Data.SectionNumber = COFF::IMAGE_SYM_ABSOLUTE;
@@ -418,7 +420,7 @@ void WinCOFFWriter::defineSymbol(const MCSymbol &MCSym) {
if (Local) {
Local->Data.Value = getSymbolValue(MCSym, *Asm);
- const MCSymbolCOFF &SymbolCOFF = cast<MCSymbolCOFF>(MCSym);
+ auto &SymbolCOFF = static_cast<const MCSymbolCOFF &>(MCSym);
Local->Data.Type = SymbolCOFF.getType();
Local->Data.StorageClass = SymbolCOFF.getClass();
@@ -821,7 +823,8 @@ void WinCOFFWriter::executePostLayoutBinding() {
for (const MCSymbol &Symbol : Asm->symbols())
// Define non-temporary or temporary static (private-linkage) symbols
if (!Symbol.isTemporary() ||
- cast<MCSymbolCOFF>(Symbol).getClass() == COFF::IMAGE_SYM_CLASS_STATIC)
+ static_cast<const MCSymbolCOFF &>(Symbol).getClass() ==
+ COFF::IMAGE_SYM_CLASS_STATIC)
defineSymbol(Symbol);
UseBigObj = Sections.size() > COFF::MaxNumberOfSections16;
@@ -1188,7 +1191,7 @@ bool WinCOFFObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(
// point to thunks, and the /GUARD:CF flag assumes that it can use relocations
// to approximate the set of all address taken functions. LLD's implementation
// of /GUARD:CF also relies on the existance of these relocations.
- uint16_t Type = cast<MCSymbolCOFF>(SymA).getType();
+ uint16_t Type = static_cast<const MCSymbolCOFF &>(SymA).getType();
if ((Type >> COFF::SCT_COMPLEX_TYPE_SHIFT) == COFF::IMAGE_SYM_DTYPE_FUNCTION)
return false;
return &SymA.getSection() == FB.getParent();
diff --git a/llvm/lib/MC/XCOFFObjectWriter.cpp b/llvm/lib/MC/XCOFFObjectWriter.cpp
index 65f543b..13917ba 100644
--- a/llvm/lib/MC/XCOFFObjectWriter.cpp
+++ b/llvm/lib/MC/XCOFFObjectWriter.cpp
@@ -591,7 +591,7 @@ void XCOFFWriter::executePostLayoutBinding() {
if (S.isTemporary())
continue;
- const MCSymbolXCOFF *XSym = cast<MCSymbolXCOFF>(&S);
+ auto *XSym = static_cast<const MCSymbolXCOFF *>(&S);
const MCSectionXCOFF *ContainingCsect = getContainingCsect(XSym);
if (ContainingCsect->isDwarfSect())
@@ -690,7 +690,8 @@ void XCOFFWriter::recordRelocation(const MCFragment &F, const MCFixup &Fixup,
std::tie(Type, SignAndSize) = TargetObjectWriter->getRelocTypeAndSignSize(
Target, Fixup, Fixup.isPCRel());
- const MCSectionXCOFF *SymASec = getContainingCsect(cast<MCSymbolXCOFF>(SymA));
+ const MCSectionXCOFF *SymASec =
+ getContainingCsect(static_cast<const MCSymbolXCOFF *>(SymA));
assert(SectionMap.contains(SymASec) &&
"Expected containing csect to exist in map.");
@@ -773,13 +774,13 @@ void XCOFFWriter::recordRelocation(const MCFragment &F, const MCFixup &Fixup,
"Expected containing csect to exist in map.");
SectionMap[RelocationSec]->Relocations.push_back(Reloc);
- const MCSymbol *const SymB = Target.getSubSym();
+ auto SymB = static_cast<const MCSymbolXCOFF *>(Target.getSubSym());
if (!SymB)
return;
if (SymA == SymB)
report_fatal_error("relocation for opposite term is not yet supported");
- const MCSectionXCOFF *SymBSec = getContainingCsect(cast<MCSymbolXCOFF>(SymB));
+ const MCSectionXCOFF *SymBSec = getContainingCsect(SymB);
assert(SectionMap.contains(SymBSec) &&
"Expected containing csect to exist in map.");
if (SymASec == SymBSec)
diff --git a/llvm/lib/Object/Binary.cpp b/llvm/lib/Object/Binary.cpp
index 2dfae8a..da2a7bb 100644
--- a/llvm/lib/Object/Binary.cpp
+++ b/llvm/lib/Object/Binary.cpp
@@ -75,6 +75,7 @@ Expected<std::unique_ptr<Binary>> object::createBinary(MemoryBufferRef Buffer,
case file_magic::xcoff_object_32:
case file_magic::xcoff_object_64:
case file_magic::wasm_object:
+ case file_magic::dxcontainer_object:
return ObjectFile::createSymbolicFile(Buffer, Type, Context, InitContent);
case file_magic::macho_universal_binary:
return MachOUniversalBinary::create(Buffer);
@@ -87,7 +88,6 @@ Expected<std::unique_ptr<Binary>> object::createBinary(MemoryBufferRef Buffer,
case file_magic::clang_ast:
case file_magic::cuda_fatbinary:
case file_magic::coff_cl_gl_object:
- case file_magic::dxcontainer_object:
case file_magic::offload_bundle:
case file_magic::offload_bundle_compressed:
case file_magic::spirv_object:
diff --git a/llvm/lib/Object/DXContainer.cpp b/llvm/lib/Object/DXContainer.cpp
index 0b46ff7..031b941 100644
--- a/llvm/lib/Object/DXContainer.cpp
+++ b/llvm/lib/Object/DXContainer.cpp
@@ -11,6 +11,7 @@
#include "llvm/Object/Error.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/FormatVariadic.h"
+#include "llvm/TargetParser/SubtargetFeature.h"
using namespace llvm;
using namespace llvm::object;
@@ -515,3 +516,183 @@ uint8_t DirectX::PSVRuntimeInfo::getSigPatchOrPrimCount() const {
return P->SigPatchOrPrimElements;
return 0;
}
+
+class DXNotSupportedError : public ErrorInfo<DXNotSupportedError> {
+public:
+ static char ID;
+
+ DXNotSupportedError(StringRef S) : FeatureString(S) {}
+
+ void log(raw_ostream &OS) const override {
+ OS << "DXContainer does not support " << FeatureString;
+ }
+
+ std::error_code convertToErrorCode() const override {
+ return inconvertibleErrorCode();
+ }
+
+private:
+ StringRef FeatureString;
+};
+
+char DXNotSupportedError::ID = 0;
+
+Expected<section_iterator>
+DXContainerObjectFile::getSymbolSection(DataRefImpl Symb) const {
+ return make_error<DXNotSupportedError>("Symbol sections");
+}
+
+Expected<StringRef> DXContainerObjectFile::getSymbolName(DataRefImpl) const {
+ return make_error<DXNotSupportedError>("Symbol names");
+}
+
+Expected<uint64_t>
+DXContainerObjectFile::getSymbolAddress(DataRefImpl Symb) const {
+ return make_error<DXNotSupportedError>("Symbol addresses");
+}
+
+uint64_t DXContainerObjectFile::getSymbolValueImpl(DataRefImpl Symb) const {
+ llvm_unreachable("DXContainer does not support symbols");
+}
+uint64_t
+DXContainerObjectFile::getCommonSymbolSizeImpl(DataRefImpl Symb) const {
+ llvm_unreachable("DXContainer does not support symbols");
+}
+
+Expected<SymbolRef::Type>
+DXContainerObjectFile::getSymbolType(DataRefImpl Symb) const {
+ return make_error<DXNotSupportedError>("Symbol types");
+}
+
+void DXContainerObjectFile::moveSectionNext(DataRefImpl &Sec) const {
+ PartIterator It = reinterpret_cast<PartIterator>(Sec.p);
+ if (It == Parts.end())
+ return;
+
+ ++It;
+ Sec.p = reinterpret_cast<uintptr_t>(It);
+}
+
+Expected<StringRef>
+DXContainerObjectFile::getSectionName(DataRefImpl Sec) const {
+ PartIterator It = reinterpret_cast<PartIterator>(Sec.p);
+ return StringRef(It->Part.getName());
+}
+
+uint64_t DXContainerObjectFile::getSectionAddress(DataRefImpl Sec) const {
+ PartIterator It = reinterpret_cast<PartIterator>(Sec.p);
+ return It->Offset;
+}
+
+uint64_t DXContainerObjectFile::getSectionIndex(DataRefImpl Sec) const {
+ return (Sec.p - reinterpret_cast<uintptr_t>(Parts.begin())) /
+ sizeof(PartIterator);
+}
+
+uint64_t DXContainerObjectFile::getSectionSize(DataRefImpl Sec) const {
+ PartIterator It = reinterpret_cast<PartIterator>(Sec.p);
+ return It->Data.size();
+}
+Expected<ArrayRef<uint8_t>>
+DXContainerObjectFile::getSectionContents(DataRefImpl Sec) const {
+ PartIterator It = reinterpret_cast<PartIterator>(Sec.p);
+ return ArrayRef<uint8_t>(It->Data.bytes_begin(), It->Data.size());
+}
+
+uint64_t DXContainerObjectFile::getSectionAlignment(DataRefImpl Sec) const {
+ return 1;
+}
+
+bool DXContainerObjectFile::isSectionCompressed(DataRefImpl Sec) const {
+ return false;
+}
+
+bool DXContainerObjectFile::isSectionText(DataRefImpl Sec) const {
+ return false;
+}
+
+bool DXContainerObjectFile::isSectionData(DataRefImpl Sec) const {
+ return false;
+}
+
+bool DXContainerObjectFile::isSectionBSS(DataRefImpl Sec) const {
+ return false;
+}
+
+bool DXContainerObjectFile::isSectionVirtual(DataRefImpl Sec) const {
+ return false;
+}
+
+relocation_iterator
+DXContainerObjectFile::section_rel_begin(DataRefImpl Sec) const {
+ return relocation_iterator(RelocationRef());
+}
+
+relocation_iterator
+DXContainerObjectFile::section_rel_end(DataRefImpl Sec) const {
+ return relocation_iterator(RelocationRef());
+}
+
+void DXContainerObjectFile::moveRelocationNext(DataRefImpl &Rel) const {
+ llvm_unreachable("DXContainer does not support relocations");
+}
+
+uint64_t DXContainerObjectFile::getRelocationOffset(DataRefImpl Rel) const {
+ llvm_unreachable("DXContainer does not support relocations");
+}
+
+symbol_iterator
+DXContainerObjectFile::getRelocationSymbol(DataRefImpl Rel) const {
+ return symbol_iterator(SymbolRef());
+}
+
+uint64_t DXContainerObjectFile::getRelocationType(DataRefImpl Rel) const {
+ llvm_unreachable("DXContainer does not support relocations");
+}
+
+void DXContainerObjectFile::getRelocationTypeName(
+ DataRefImpl Rel, SmallVectorImpl<char> &Result) const {
+ llvm_unreachable("DXContainer does not support relocations");
+}
+
+section_iterator DXContainerObjectFile::section_begin() const {
+ DataRefImpl Sec;
+ Sec.p = reinterpret_cast<uintptr_t>(Parts.begin());
+ return section_iterator(SectionRef(Sec, this));
+}
+section_iterator DXContainerObjectFile::section_end() const {
+ DataRefImpl Sec;
+ Sec.p = reinterpret_cast<uintptr_t>(Parts.end());
+ return section_iterator(SectionRef(Sec, this));
+}
+
+uint8_t DXContainerObjectFile::getBytesInAddress() const { return 4; }
+
+StringRef DXContainerObjectFile::getFileFormatName() const {
+ return "DirectX Container";
+}
+
+Triple::ArchType DXContainerObjectFile::getArch() const { return Triple::dxil; }
+
+Expected<SubtargetFeatures> DXContainerObjectFile::getFeatures() const {
+ return SubtargetFeatures();
+}
+
+Error DXContainerObjectFile::printSymbolName(raw_ostream &OS,
+ DataRefImpl Symb) const {
+ return make_error<DXNotSupportedError>("Symbol names");
+}
+
+Expected<uint32_t>
+DXContainerObjectFile::getSymbolFlags(DataRefImpl Symb) const {
+ return make_error<DXNotSupportedError>("Symbol flags");
+}
+
+Expected<std::unique_ptr<DXContainerObjectFile>>
+ObjectFile::createDXContainerObjectFile(MemoryBufferRef Object) {
+ auto ExC = DXContainer::create(Object);
+ if (!ExC)
+ return ExC.takeError();
+ std::unique_ptr<DXContainerObjectFile> Obj(new DXContainerObjectFile(*ExC));
+ return std::move(Obj);
+}
diff --git a/llvm/lib/Object/Object.cpp b/llvm/lib/Object/Object.cpp
index c62944a..112927e 100644
--- a/llvm/lib/Object/Object.cpp
+++ b/llvm/lib/Object/Object.cpp
@@ -124,6 +124,8 @@ LLVMBinaryType LLVMBinaryGetType(LLVMBinaryRef BR) {
return LLVMBinaryTypeOffload;
case ID_Wasm:
return LLVMBinaryTypeWasm;
+ case ID_DXContainer:
+ return LLVMBinaryTypeDXcontainer;
case ID_StartObjects:
case ID_EndObjects:
llvm_unreachable("Marker types are not valid binary kinds!");
diff --git a/llvm/lib/Object/ObjectFile.cpp b/llvm/lib/Object/ObjectFile.cpp
index 6a226a3..b0e4ea0 100644
--- a/llvm/lib/Object/ObjectFile.cpp
+++ b/llvm/lib/Object/ObjectFile.cpp
@@ -15,6 +15,7 @@
#include "llvm/BinaryFormat/Magic.h"
#include "llvm/Object/Binary.h"
#include "llvm/Object/COFF.h"
+#include "llvm/Object/DXContainer.h"
#include "llvm/Object/Error.h"
#include "llvm/Object/MachO.h"
#include "llvm/Object/Wasm.h"
@@ -165,7 +166,6 @@ ObjectFile::createObjectFile(MemoryBufferRef Object, file_magic Type,
case file_magic::goff_object:
case file_magic::cuda_fatbinary:
case file_magic::offload_binary:
- case file_magic::dxcontainer_object:
case file_magic::offload_bundle:
case file_magic::offload_bundle_compressed:
case file_magic::spirv_object:
@@ -201,6 +201,8 @@ ObjectFile::createObjectFile(MemoryBufferRef Object, file_magic Type,
return createXCOFFObjectFile(Object, Binary::ID_XCOFF64);
case file_magic::wasm_object:
return createWasmObjectFile(Object);
+ case file_magic::dxcontainer_object:
+ return createDXContainerObjectFile(Object);
}
llvm_unreachable("Unexpected Object File Type");
}
diff --git a/llvm/lib/Object/SymbolicFile.cpp b/llvm/lib/Object/SymbolicFile.cpp
index e87ecb1..47295e6 100644
--- a/llvm/lib/Object/SymbolicFile.cpp
+++ b/llvm/lib/Object/SymbolicFile.cpp
@@ -68,6 +68,7 @@ SymbolicFile::createSymbolicFile(MemoryBufferRef Object, file_magic Type,
case file_magic::xcoff_object_32:
case file_magic::xcoff_object_64:
case file_magic::wasm_object:
+ case file_magic::dxcontainer_object:
return ObjectFile::createObjectFile(Object, Type, InitContent);
case file_magic::coff_import_library:
return std::unique_ptr<SymbolicFile>(new COFFImportFile(Object));
@@ -123,6 +124,7 @@ bool SymbolicFile::isSymbolicFile(file_magic Type, const LLVMContext *Context) {
case file_magic::elf_relocatable:
case file_magic::macho_object:
case file_magic::coff_object:
+ case file_magic::dxcontainer_object:
return true;
default:
return false;
diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp
index bd3964c..5425729 100644
--- a/llvm/lib/ProfileData/InstrProf.cpp
+++ b/llvm/lib/ProfileData/InstrProf.cpp
@@ -1160,8 +1160,7 @@ void getValueForSiteInstrProf(const void *R, InstrProfValueData *Dst,
}
ValueProfData *allocValueProfDataInstrProf(size_t TotalSizeInBytes) {
- ValueProfData *VD =
- (ValueProfData *)(new (::operator new(TotalSizeInBytes)) ValueProfData());
+ ValueProfData *VD = new (::operator new(TotalSizeInBytes)) ValueProfData();
memset(VD, 0, TotalSizeInBytes);
return VD;
}
diff --git a/llvm/lib/SandboxIR/Value.cpp b/llvm/lib/SandboxIR/Value.cpp
index e39bbc4..94b4a4c 100644
--- a/llvm/lib/SandboxIR/Value.cpp
+++ b/llvm/lib/SandboxIR/Value.cpp
@@ -22,7 +22,7 @@ Value::Value(ClassID SubclassID, llvm::Value *Val, Context &Ctx)
Value::use_iterator Value::use_begin() {
llvm::Use *LLVMUse = nullptr;
- if (Val->use_begin() != Val->use_end())
+ if (!Val->uses().empty())
LLVMUse = &*Val->use_begin();
User *User = LLVMUse != nullptr ? cast_or_null<sandboxir::User>(Ctx.getValue(
Val->use_begin()->getUser()))
diff --git a/llvm/lib/Support/BalancedPartitioning.cpp b/llvm/lib/Support/BalancedPartitioning.cpp
index ed3b149..1914f4c 100644
--- a/llvm/lib/Support/BalancedPartitioning.cpp
+++ b/llvm/lib/Support/BalancedPartitioning.cpp
@@ -306,7 +306,7 @@ void BalancedPartitioning::split(const FunctionNodeRange Nodes,
unsigned NumNodes = std::distance(Nodes.begin(), Nodes.end());
auto NodesMid = Nodes.begin() + (NumNodes + 1) / 2;
- llvm::sort(Nodes.begin(), Nodes.end(), [](auto &L, auto &R) {
+ llvm::sort(Nodes, [](auto &L, auto &R) {
return L.InputOrderIndex < R.InputOrderIndex;
});
diff --git a/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp b/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp
index e8d3161..082de56 100644
--- a/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp
@@ -597,6 +597,14 @@ Function *AArch64Arm64ECCallLowering::buildEntryThunk(Function *F) {
return Thunk;
}
+std::optional<std::string> getArm64ECMangledFunctionName(GlobalValue &GV) {
+ if (!GV.hasName()) {
+ GV.setName("__unnamed");
+ }
+
+ return llvm::getArm64ECMangledFunctionName(GV.getName());
+}
+
// Builds the "guest exit thunk", a helper to call a function which may or may
// not be an exit thunk. (We optimistically assume non-dllimport function
// declarations refer to functions defined in AArch64 code; if the linker
@@ -608,7 +616,7 @@ Function *AArch64Arm64ECCallLowering::buildGuestExitThunk(Function *F) {
getThunkType(F->getFunctionType(), F->getAttributes(),
Arm64ECThunkType::GuestExit, NullThunkName, Arm64Ty, X64Ty,
ArgTranslations);
- auto MangledName = getArm64ECMangledFunctionName(F->getName().str());
+ auto MangledName = getArm64ECMangledFunctionName(*F);
assert(MangledName && "Can't guest exit to function that's already native");
std::string ThunkName = *MangledName;
if (ThunkName[0] == '?' && ThunkName.find("@") != std::string::npos) {
@@ -790,7 +798,7 @@ bool AArch64Arm64ECCallLowering::runOnModule(Module &Mod) {
if (!F)
continue;
if (std::optional<std::string> MangledName =
- getArm64ECMangledFunctionName(A.getName().str())) {
+ getArm64ECMangledFunctionName(A)) {
F->addMetadata("arm64ec_unmangled_name",
*MDNode::get(M->getContext(),
MDString::get(M->getContext(), A.getName())));
@@ -807,7 +815,7 @@ bool AArch64Arm64ECCallLowering::runOnModule(Module &Mod) {
cast<GlobalValue>(F.getPersonalityFn()->stripPointerCasts());
if (PersFn->getValueType() && PersFn->getValueType()->isFunctionTy()) {
if (std::optional<std::string> MangledName =
- getArm64ECMangledFunctionName(PersFn->getName().str())) {
+ getArm64ECMangledFunctionName(*PersFn)) {
PersFn->setName(MangledName.value());
}
}
@@ -821,7 +829,7 @@ bool AArch64Arm64ECCallLowering::runOnModule(Module &Mod) {
// Rename hybrid patchable functions and change callers to use a global
// alias instead.
if (std::optional<std::string> MangledName =
- getArm64ECMangledFunctionName(F.getName().str())) {
+ getArm64ECMangledFunctionName(F)) {
std::string OrigName(F.getName());
F.setName(MangledName.value() + HybridPatchableTargetSuffix);
@@ -927,7 +935,7 @@ bool AArch64Arm64ECCallLowering::processFunction(
// FIXME: Handle functions with weak linkage?
if (!F.hasLocalLinkage() || F.hasAddressTaken()) {
if (std::optional<std::string> MangledName =
- getArm64ECMangledFunctionName(F.getName().str())) {
+ getArm64ECMangledFunctionName(F)) {
F.addMetadata("arm64ec_unmangled_name",
*MDNode::get(M->getContext(),
MDString::get(M->getContext(), F.getName())));
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index ea63edd8..8887657 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -887,6 +887,10 @@ private:
bool shouldScalarizeBinop(SDValue VecOp) const override {
return VecOp.getOpcode() == ISD::SETCC;
}
+
+ bool hasMultipleConditionRegisters(EVT VT) const override {
+ return VT.isScalableVector();
+ }
};
namespace AArch64 {
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index ba7cbcc..5a537f2 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -6484,7 +6484,9 @@ class BaseSIMDThreeSameVectorDot<bit Q, bit U, bits<2> sz, bits<4> opc, string a
(OpNode (AccumType RegType:$Rd),
(InputType RegType:$Rn),
(InputType RegType:$Rm)))]> {
- let AsmString = !strconcat(asm, "{\t$Rd" # kind1 # ", $Rn" # kind2 # ", $Rm" # kind2 # "}");
+
+ let AsmString = !strconcat(asm, "{\t$Rd" # kind1 # ", $Rn" # kind2 # ", $Rm" # kind2 #
+ "|" # kind1 # "\t$Rd, $Rn, $Rm}");
}
multiclass SIMDThreeSameVectorDot<bit U, bit Mixed, string asm, SDPatternOperator OpNode> {
@@ -6507,7 +6509,8 @@ class BaseSIMDThreeSameVectorFML<bit Q, bit U, bit b13, bits<3> size, string asm
(OpNode (AccumType RegType:$Rd),
(InputType RegType:$Rn),
(InputType RegType:$Rm)))]> {
- let AsmString = !strconcat(asm, "{\t$Rd" # kind1 # ", $Rn" # kind2 # ", $Rm" # kind2 # "}");
+ let AsmString = !strconcat(asm, "{\t$Rd" # kind1 # ", $Rn" # kind2 # ", $Rm" # kind2 #
+ "|" # kind1 # "\t$Rd, $Rn, $Rm}");
let Inst{13} = b13;
}
@@ -8986,7 +8989,8 @@ class BaseSIMDThreeSameVectorBFDot<bit Q, bit U, string asm, string kind1,
(InputType RegType:$Rm)))]> {
let AsmString = !strconcat(asm,
"{\t$Rd" # kind1 # ", $Rn" # kind2 #
- ", $Rm" # kind2 # "}");
+ ", $Rm" # kind2 #
+ "|" # kind1 # "\t$Rd, $Rn, $Rm}");
}
multiclass SIMDThreeSameVectorBFDot<bit U, string asm> {
@@ -9032,7 +9036,7 @@ class SIMDBF16MLAL<bit Q, string asm, SDPatternOperator OpNode>
[(set (v4f32 V128:$dst), (OpNode (v4f32 V128:$Rd),
(v8bf16 V128:$Rn),
(v8bf16 V128:$Rm)))]> {
- let AsmString = !strconcat(asm, "{\t$Rd.4s, $Rn.8h, $Rm.8h}");
+ let AsmString = !strconcat(asm, "{\t$Rd.4s, $Rn.8h, $Rm.8h|.4s\t$Rd, $Rn, $Rm}");
}
let mayRaiseFPException = 1, Uses = [FPCR] in
@@ -9071,8 +9075,7 @@ class SIMDThreeSameVectorBF16MatrixMul<string asm>
(int_aarch64_neon_bfmmla (v4f32 V128:$Rd),
(v8bf16 V128:$Rn),
(v8bf16 V128:$Rm)))]> {
- let AsmString = !strconcat(asm, "{\t$Rd", ".4s", ", $Rn", ".8h",
- ", $Rm", ".8h", "}");
+ let AsmString = !strconcat(asm, "{\t$Rd.4s, $Rn.8h, $Rm.8h|.4s\t$Rd, $Rn, $Rm}");
}
let mayRaiseFPException = 1, Uses = [FPCR] in
@@ -9143,7 +9146,7 @@ class SIMDThreeSameVectorMatMul<bit B, bit U, string asm, SDPatternOperator OpNo
[(set (v4i32 V128:$dst), (OpNode (v4i32 V128:$Rd),
(v16i8 V128:$Rn),
(v16i8 V128:$Rm)))]> {
- let AsmString = asm # "{\t$Rd.4s, $Rn.16b, $Rm.16b}";
+ let AsmString = asm # "{\t$Rd.4s, $Rn.16b, $Rm.16b|.4s\t$Rd, $Rn, $Rm}";
}
//----------------------------------------------------------------------------
@@ -13344,8 +13347,8 @@ multiclass AtomicFPStore<bit R, bits<3> op0, string asm> {
class BaseSIMDThreeSameVectorFP8MatrixMul<string asm, bits<2> size, string kind>
: BaseSIMDThreeSameVectorTied<1, 1, {size, 0}, 0b11101,
V128, asm, ".16b", []> {
- let AsmString = !strconcat(asm, "{\t$Rd", kind, ", $Rn", ".16b",
- ", $Rm", ".16b", "}");
+ let AsmString = !strconcat(asm, "{\t$Rd", kind, ", $Rn.16b, $Rm.16b",
+ "|", kind, "\t$Rd, $Rn, $Rm}");
}
multiclass SIMDThreeSameVectorFP8MatrixMul<string asm>{
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index e1adc0b..9f05add 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -3092,6 +3092,13 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
return AdjustCost(
BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I));
+ // For the moment we do not have lowering for SVE1-only fptrunc f64->bf16 as
+ // we use fcvtx under SVE2. Give them invalid costs.
+ if (!ST->hasSVE2() && !ST->isStreamingSVEAvailable() &&
+ ISD == ISD::FP_ROUND && SrcTy.isScalableVector() &&
+ DstTy.getScalarType() == MVT::bf16 && SrcTy.getScalarType() == MVT::f64)
+ return InstructionCost::getInvalid();
+
static const TypeConversionCostTblEntry BF16Tbl[] = {
{ISD::FP_ROUND, MVT::bf16, MVT::f32, 1}, // bfcvt
{ISD::FP_ROUND, MVT::bf16, MVT::f64, 1}, // bfcvt
@@ -3100,6 +3107,12 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
{ISD::FP_ROUND, MVT::v2bf16, MVT::v2f64, 2}, // bfcvtn+fcvtn
{ISD::FP_ROUND, MVT::v4bf16, MVT::v4f64, 3}, // fcvtn+fcvtl2+bfcvtn
{ISD::FP_ROUND, MVT::v8bf16, MVT::v8f64, 6}, // 2 * fcvtn+fcvtn2+bfcvtn
+ {ISD::FP_ROUND, MVT::nxv2bf16, MVT::nxv2f32, 1}, // bfcvt
+ {ISD::FP_ROUND, MVT::nxv4bf16, MVT::nxv4f32, 1}, // bfcvt
+ {ISD::FP_ROUND, MVT::nxv8bf16, MVT::nxv8f32, 3}, // bfcvt+bfcvt+uzp1
+ {ISD::FP_ROUND, MVT::nxv2bf16, MVT::nxv2f64, 2}, // fcvtx+bfcvt
+ {ISD::FP_ROUND, MVT::nxv4bf16, MVT::nxv4f64, 5}, // 2*fcvtx+2*bfcvt+uzp1
+ {ISD::FP_ROUND, MVT::nxv8bf16, MVT::nxv8f64, 11}, // 4*fcvt+4*bfcvt+3*uzp
};
if (ST->hasBF16())
@@ -3508,11 +3521,21 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
{ISD::FP_ROUND, MVT::nxv4f16, MVT::nxv4f32, 1},
{ISD::FP_ROUND, MVT::nxv8f16, MVT::nxv8f32, 3},
+ // Truncate from nxvmf32 to nxvmbf16.
+ {ISD::FP_ROUND, MVT::nxv2bf16, MVT::nxv2f32, 8},
+ {ISD::FP_ROUND, MVT::nxv4bf16, MVT::nxv4f32, 8},
+ {ISD::FP_ROUND, MVT::nxv8bf16, MVT::nxv8f32, 17},
+
// Truncate from nxvmf64 to nxvmf16.
{ISD::FP_ROUND, MVT::nxv2f16, MVT::nxv2f64, 1},
{ISD::FP_ROUND, MVT::nxv4f16, MVT::nxv4f64, 3},
{ISD::FP_ROUND, MVT::nxv8f16, MVT::nxv8f64, 7},
+ // Truncate from nxvmf64 to nxvmbf16.
+ {ISD::FP_ROUND, MVT::nxv2bf16, MVT::nxv2f64, 9},
+ {ISD::FP_ROUND, MVT::nxv4bf16, MVT::nxv4f64, 19},
+ {ISD::FP_ROUND, MVT::nxv8bf16, MVT::nxv8f64, 39},
+
// Truncate from nxvmf64 to nxvmf32.
{ISD::FP_ROUND, MVT::nxv2f32, MVT::nxv2f64, 1},
{ISD::FP_ROUND, MVT::nxv4f32, MVT::nxv4f64, 3},
@@ -3523,11 +3546,21 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
{ISD::FP_EXTEND, MVT::nxv4f32, MVT::nxv4f16, 1},
{ISD::FP_EXTEND, MVT::nxv8f32, MVT::nxv8f16, 2},
+ // Extend from nxvmbf16 to nxvmf32.
+ {ISD::FP_EXTEND, MVT::nxv2f32, MVT::nxv2bf16, 1}, // lsl
+ {ISD::FP_EXTEND, MVT::nxv4f32, MVT::nxv4bf16, 1}, // lsl
+ {ISD::FP_EXTEND, MVT::nxv8f32, MVT::nxv8bf16, 4}, // unpck+unpck+lsl+lsl
+
// Extend from nxvmf16 to nxvmf64.
{ISD::FP_EXTEND, MVT::nxv2f64, MVT::nxv2f16, 1},
{ISD::FP_EXTEND, MVT::nxv4f64, MVT::nxv4f16, 2},
{ISD::FP_EXTEND, MVT::nxv8f64, MVT::nxv8f16, 4},
+ // Extend from nxvmbf16 to nxvmf64.
+ {ISD::FP_EXTEND, MVT::nxv2f64, MVT::nxv2bf16, 2}, // lsl+fcvt
+ {ISD::FP_EXTEND, MVT::nxv4f64, MVT::nxv4bf16, 6}, // 2*unpck+2*lsl+2*fcvt
+ {ISD::FP_EXTEND, MVT::nxv8f64, MVT::nxv8bf16, 14}, // 6*unpck+4*lsl+4*fcvt
+
// Extend from nxvmf32 to nxvmf64.
{ISD::FP_EXTEND, MVT::nxv2f64, MVT::nxv2f32, 1},
{ISD::FP_EXTEND, MVT::nxv4f64, MVT::nxv4f32, 2},
@@ -4282,10 +4315,9 @@ InstructionCost AArch64TTIImpl::getCmpSelInstrCost(
unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred,
TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info,
TTI::OperandValueInfo Op2Info, const Instruction *I) const {
- int ISD = TLI->InstructionOpcodeToISD(Opcode);
// We don't lower some vector selects well that are wider than the register
// width. TODO: Improve this with different cost kinds.
- if (isa<FixedVectorType>(ValTy) && ISD == ISD::SELECT) {
+ if (isa<FixedVectorType>(ValTy) && Opcode == Instruction::Select) {
// We would need this many instructions to hide the scalarization happening.
const int AmortizationCost = 20;
@@ -4315,55 +4347,72 @@ InstructionCost AArch64TTIImpl::getCmpSelInstrCost(
return LT.first;
}
- static const TypeConversionCostTblEntry
- VectorSelectTbl[] = {
- { ISD::SELECT, MVT::v2i1, MVT::v2f32, 2 },
- { ISD::SELECT, MVT::v2i1, MVT::v2f64, 2 },
- { ISD::SELECT, MVT::v4i1, MVT::v4f32, 2 },
- { ISD::SELECT, MVT::v4i1, MVT::v4f16, 2 },
- { ISD::SELECT, MVT::v8i1, MVT::v8f16, 2 },
- { ISD::SELECT, MVT::v16i1, MVT::v16i16, 16 },
- { ISD::SELECT, MVT::v8i1, MVT::v8i32, 8 },
- { ISD::SELECT, MVT::v16i1, MVT::v16i32, 16 },
- { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4 * AmortizationCost },
- { ISD::SELECT, MVT::v8i1, MVT::v8i64, 8 * AmortizationCost },
- { ISD::SELECT, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost }
- };
+ static const TypeConversionCostTblEntry VectorSelectTbl[] = {
+ {Instruction::Select, MVT::v2i1, MVT::v2f32, 2},
+ {Instruction::Select, MVT::v2i1, MVT::v2f64, 2},
+ {Instruction::Select, MVT::v4i1, MVT::v4f32, 2},
+ {Instruction::Select, MVT::v4i1, MVT::v4f16, 2},
+ {Instruction::Select, MVT::v8i1, MVT::v8f16, 2},
+ {Instruction::Select, MVT::v16i1, MVT::v16i16, 16},
+ {Instruction::Select, MVT::v8i1, MVT::v8i32, 8},
+ {Instruction::Select, MVT::v16i1, MVT::v16i32, 16},
+ {Instruction::Select, MVT::v4i1, MVT::v4i64, 4 * AmortizationCost},
+ {Instruction::Select, MVT::v8i1, MVT::v8i64, 8 * AmortizationCost},
+ {Instruction::Select, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost}};
EVT SelCondTy = TLI->getValueType(DL, CondTy);
EVT SelValTy = TLI->getValueType(DL, ValTy);
if (SelCondTy.isSimple() && SelValTy.isSimple()) {
- if (const auto *Entry = ConvertCostTableLookup(VectorSelectTbl, ISD,
+ if (const auto *Entry = ConvertCostTableLookup(VectorSelectTbl, Opcode,
SelCondTy.getSimpleVT(),
SelValTy.getSimpleVT()))
return Entry->Cost;
}
}
- if (isa<FixedVectorType>(ValTy) && ISD == ISD::SETCC) {
- Type *ValScalarTy = ValTy->getScalarType();
- if ((ValScalarTy->isHalfTy() && !ST->hasFullFP16()) ||
- ValScalarTy->isBFloatTy()) {
- auto *ValVTy = cast<FixedVectorType>(ValTy);
-
- // Without dedicated instructions we promote [b]f16 compares to f32.
- auto *PromotedTy =
- VectorType::get(Type::getFloatTy(ValTy->getContext()), ValVTy);
-
- InstructionCost Cost = 0;
- // Promote operands to float vectors.
- Cost += 2 * getCastInstrCost(Instruction::FPExt, PromotedTy, ValTy,
- TTI::CastContextHint::None, CostKind);
- // Compare float vectors.
+ if (Opcode == Instruction::FCmp) {
+ // Without dedicated instructions we promote f16 + bf16 compares to f32.
+ if ((!ST->hasFullFP16() && ValTy->getScalarType()->isHalfTy()) ||
+ ValTy->getScalarType()->isBFloatTy()) {
+ Type *PromotedTy =
+ ValTy->getWithNewType(Type::getFloatTy(ValTy->getContext()));
+ InstructionCost Cost =
+ getCastInstrCost(Instruction::FPExt, PromotedTy, ValTy,
+ TTI::CastContextHint::None, CostKind);
+ if (!Op1Info.isConstant() && !Op2Info.isConstant())
+ Cost *= 2;
Cost += getCmpSelInstrCost(Opcode, PromotedTy, CondTy, VecPred, CostKind,
Op1Info, Op2Info);
- // During codegen we'll truncate the vector result from i32 to i16.
- Cost +=
- getCastInstrCost(Instruction::Trunc, VectorType::getInteger(ValVTy),
- VectorType::getInteger(PromotedTy),
- TTI::CastContextHint::None, CostKind);
+ if (ValTy->isVectorTy())
+ Cost += getCastInstrCost(
+ Instruction::Trunc, VectorType::getInteger(cast<VectorType>(ValTy)),
+ VectorType::getInteger(cast<VectorType>(PromotedTy)),
+ TTI::CastContextHint::None, CostKind);
return Cost;
}
+
+ auto LT = getTypeLegalizationCost(ValTy);
+ // Model unknown fp compares as a libcall.
+ if (LT.second.getScalarType() != MVT::f64 &&
+ LT.second.getScalarType() != MVT::f32 &&
+ LT.second.getScalarType() != MVT::f16)
+ return LT.first * getCallInstrCost(/*Function*/ nullptr, ValTy,
+ {ValTy, ValTy}, CostKind);
+
+ // Some comparison operators require expanding to multiple compares + or.
+ unsigned Factor = 1;
+ if (!CondTy->isVectorTy() &&
+ (VecPred == FCmpInst::FCMP_ONE || VecPred == FCmpInst::FCMP_UEQ))
+ Factor = 2; // fcmp with 2 selects
+ else if (isa<FixedVectorType>(ValTy) &&
+ (VecPred == FCmpInst::FCMP_ONE || VecPred == FCmpInst::FCMP_UEQ ||
+ VecPred == FCmpInst::FCMP_ORD || VecPred == FCmpInst::FCMP_UNO))
+ Factor = 3; // fcmxx+fcmyy+or
+ else if (isa<ScalableVectorType>(ValTy) &&
+ (VecPred == FCmpInst::FCMP_ONE || VecPred == FCmpInst::FCMP_UEQ))
+ Factor = 3; // fcmxx+fcmyy+or
+
+ return Factor * (CostKind == TTI::TCK_Latency ? 2 : LT.first);
}
// Treat the icmp in icmp(and, 0) or icmp(and, -1/1) when it can be folded to
@@ -4371,7 +4420,7 @@ InstructionCost AArch64TTIImpl::getCmpSelInstrCost(
// comparison is not unsigned. FIXME: Enable for non-throughput cost kinds
// providing it will not cause performance regressions.
if (CostKind == TTI::TCK_RecipThroughput && ValTy->isIntegerTy() &&
- ISD == ISD::SETCC && I && !CmpInst::isUnsigned(VecPred) &&
+ Opcode == Instruction::ICmp && I && !CmpInst::isUnsigned(VecPred) &&
TLI->isTypeLegal(TLI->getValueType(DL, ValTy)) &&
match(I->getOperand(0), m_And(m_Value(), m_Value()))) {
if (match(I->getOperand(1), m_Zero()))
@@ -6235,10 +6284,17 @@ bool AArch64TTIImpl::isProfitableToSinkOperands(
}
}
- auto ShouldSinkCondition = [](Value *Cond) -> bool {
+ auto ShouldSinkCondition = [](Value *Cond,
+ SmallVectorImpl<Use *> &Ops) -> bool {
+ if (!isa<IntrinsicInst>(Cond))
+ return false;
auto *II = dyn_cast<IntrinsicInst>(Cond);
- return II && II->getIntrinsicID() == Intrinsic::vector_reduce_or &&
- isa<ScalableVectorType>(II->getOperand(0)->getType());
+ if (II->getIntrinsicID() != Intrinsic::vector_reduce_or ||
+ !isa<ScalableVectorType>(II->getOperand(0)->getType()))
+ return false;
+ if (isa<CmpInst>(II->getOperand(0)))
+ Ops.push_back(&II->getOperandUse(0));
+ return true;
};
switch (I->getOpcode()) {
@@ -6254,7 +6310,7 @@ bool AArch64TTIImpl::isProfitableToSinkOperands(
}
break;
case Instruction::Select: {
- if (!ShouldSinkCondition(I->getOperand(0)))
+ if (!ShouldSinkCondition(I->getOperand(0), Ops))
return false;
Ops.push_back(&I->getOperandUse(0));
@@ -6264,7 +6320,7 @@ bool AArch64TTIImpl::isProfitableToSinkOperands(
if (cast<BranchInst>(I)->isUnconditional())
return false;
- if (!ShouldSinkCondition(cast<BranchInst>(I)->getCondition()))
+ if (!ShouldSinkCondition(cast<BranchInst>(I)->getCondition(), Ops))
return false;
Ops.push_back(&I->getOperandUse(0));
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
index 6912caf..7a2b679 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
@@ -79,8 +79,7 @@ public:
}
void applyFixup(const MCFragment &, const MCFixup &, const MCValue &Target,
- MutableArrayRef<char> Data, uint64_t Value,
- bool IsResolved) override;
+ uint8_t *Data, uint64_t Value, bool IsResolved) override;
bool fixupNeedsRelaxation(const MCFixup &Fixup,
uint64_t Value) const override;
@@ -421,9 +420,8 @@ static bool shouldForceRelocation(const MCFixup &Fixup) {
}
void AArch64AsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
- const MCValue &Target,
- MutableArrayRef<char> Data, uint64_t Value,
- bool IsResolved) {
+ const MCValue &Target, uint8_t *Data,
+ uint64_t Value, bool IsResolved) {
if (shouldForceRelocation(Fixup))
IsResolved = false;
maybeAddReloc(F, Fixup, Target, Value, IsResolved);
@@ -460,8 +458,8 @@ void AArch64AsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
// Shift the value into position.
Value <<= Info.TargetOffset;
- unsigned Offset = Fixup.getOffset();
- assert(Offset + NumBytes <= F.getSize() && "Invalid fixup offset!");
+ assert(Fixup.getOffset() + NumBytes <= F.getSize() &&
+ "Invalid fixup offset!");
// Used to point to big endian bytes.
unsigned FulleSizeInBytes = getFixupKindContainereSizeInBytes(Fixup.getKind());
@@ -471,15 +469,16 @@ void AArch64AsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
if (FulleSizeInBytes == 0) {
// Handle as little-endian
for (unsigned i = 0; i != NumBytes; ++i) {
- Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff);
+ Data[i] |= uint8_t((Value >> (i * 8)) & 0xff);
}
} else {
// Handle as big-endian
- assert((Offset + FulleSizeInBytes) <= Data.size() && "Invalid fixup size!");
+ assert(Fixup.getOffset() + FulleSizeInBytes <= F.getSize() &&
+ "Invalid fixup size!");
assert(NumBytes <= FulleSizeInBytes && "Invalid fixup size!");
for (unsigned i = 0; i != NumBytes; ++i) {
unsigned Idx = FulleSizeInBytes - 1 - i;
- Data[Offset + Idx] |= uint8_t((Value >> (i * 8)) & 0xff);
+ Data[Idx] |= uint8_t((Value >> (i * 8)) & 0xff);
}
}
@@ -492,9 +491,9 @@ void AArch64AsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
// If the immediate is negative, generate MOVN else MOVZ.
// (Bit 30 = 0) ==> MOVN, (Bit 30 = 1) ==> MOVZ.
if (SignedValue < 0)
- Data[Offset + 3] &= ~(1 << 6);
+ Data[3] &= ~(1 << 6);
else
- Data[Offset + 3] |= (1 << 6);
+ Data[3] |= (1 << 6);
}
}
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
index 7618a57..45ac023 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
@@ -96,8 +96,8 @@ unsigned AArch64ELFObjectWriter::getRelocType(const MCFixup &Fixup,
case AArch64::S_TPREL:
case AArch64::S_TLSDESC:
case AArch64::S_TLSDESC_AUTH:
- if (auto *SA = Target.getAddSym())
- cast<MCSymbolELF>(SA)->setType(ELF::STT_TLS);
+ if (auto *SA = const_cast<MCSymbol *>(Target.getAddSym()))
+ static_cast<MCSymbolELF *>(SA)->setType(ELF::STT_TLS);
break;
default:
break;
@@ -488,7 +488,8 @@ bool AArch64ELFObjectWriter::needsRelocateWithSymbol(const MCValue &Val,
// this global needs to be tagged. In addition, the linker needs to know
// whether to emit a special addend when relocating `end` symbols, and this
// can only be determined by the attributes of the symbol itself.
- if (Val.getAddSym() && cast<MCSymbolELF>(Val.getAddSym())->isMemtag())
+ if (Val.getAddSym() &&
+ static_cast<const MCSymbolELF *>(Val.getAddSym())->isMemtag())
return true;
if ((Val.getSpecifier() & AArch64::S_GOT) == AArch64::S_GOT)
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
index 6257e99..14547e3 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
@@ -418,7 +418,8 @@ private:
}
MCSymbol *emitMappingSymbol(StringRef Name) {
- auto *Symbol = cast<MCSymbolELF>(getContext().createLocalSymbol(Name));
+ auto *Symbol =
+ static_cast<MCSymbolELF *>(getContext().createLocalSymbol(Name));
emitLabel(Symbol);
return Symbol;
}
@@ -455,7 +456,7 @@ void AArch64TargetELFStreamer::emitInst(uint32_t Inst) {
void AArch64TargetELFStreamer::emitDirectiveVariantPCS(MCSymbol *Symbol) {
getStreamer().getAssembler().registerSymbol(*Symbol);
- cast<MCSymbolELF>(Symbol)->setOther(ELF::STO_AARCH64_VARIANT_PCS);
+ static_cast<MCSymbolELF *>(Symbol)->setOther(ELF::STO_AARCH64_VARIANT_PCS);
}
void AArch64TargetELFStreamer::finish() {
@@ -541,7 +542,7 @@ void AArch64TargetELFStreamer::finish() {
MCSectionELF *MemtagSec = nullptr;
for (const MCSymbol &Symbol : Asm.symbols()) {
- const auto &Sym = cast<MCSymbolELF>(Symbol);
+ auto &Sym = static_cast<const MCSymbolELF &>(Symbol);
if (Sym.isMemtag()) {
MemtagSec = Ctx.getELFSection(".memtag.globals.static",
ELF::SHT_AARCH64_MEMTAG_GLOBALS_STATIC, 0);
@@ -556,7 +557,7 @@ void AArch64TargetELFStreamer::finish() {
S.switchSection(MemtagSec);
const auto *Zero = MCConstantExpr::create(0, Ctx);
for (const MCSymbol &Symbol : Asm.symbols()) {
- const auto &Sym = cast<MCSymbolELF>(Symbol);
+ auto &Sym = static_cast<const MCSymbolELF &>(Symbol);
if (!Sym.isMemtag())
continue;
auto *SRE = MCSymbolRefExpr::create(&Sym, Ctx);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 8a0c4ac..18f3c47 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -1160,6 +1160,12 @@ def FeatureTanhInsts : SubtargetFeature<"tanh-insts",
"Has v_tanh_f32/f16 instructions"
>;
+def FeatureTensorCvtLutInsts : SubtargetFeature<"tensor-cvt-lut-insts",
+ "HasTensorCvtLutInsts",
+ "true",
+ "Has v_perm_pk16* instructions"
+>;
+
def FeatureTransposeLoadF4F6Insts : SubtargetFeature<"transpose-load-f4f6-insts",
"HasTransposeLoadF4F6Insts",
"true",
@@ -2030,6 +2036,7 @@ def FeatureISAVersion12_50 : FeatureSet<
FeatureDPPSrc1SGPR,
FeatureBitOp3Insts,
FeatureTanhInsts,
+ FeatureTensorCvtLutInsts,
FeatureTransposeLoadF4F6Insts,
FeatureBF16TransInsts,
FeatureBF16ConversionInsts,
@@ -2785,6 +2792,9 @@ def HasBitOp3Insts : Predicate<"Subtarget->hasBitOp3Insts()">,
def HasTanhInsts : Predicate<"Subtarget->hasTanhInsts()">,
AssemblerPredicate<(all_of FeatureTanhInsts)>;
+def HasTensorCvtLutInsts : Predicate<"Subtarget->hasTensorCvtLutInsts()">,
+ AssemblerPredicate<(all_of FeatureTensorCvtLutInsts)>;
+
def HasTransposeLoadF4F6Insts : Predicate<"Subtarget->hasTransposeLoadF4F6Insts()">,
AssemblerPredicate<(all_of FeatureTransposeLoadF4F6Insts)>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index 992572f..394a143 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -51,18 +51,6 @@ def gi_vop3pmodsdot :
GIComplexOperandMatcher<s32, "selectVOP3PModsDOT">,
GIComplexPatternEquiv<VOP3PModsDOT>;
-def gi_vop3pmodsneg :
- GIComplexOperandMatcher<s32, "selectVOP3PModsNeg">,
- GIComplexPatternEquiv<VOP3PModsNeg>;
-
-def gi_vop3pmodsnegs :
- GIComplexOperandMatcher<s32, "selectVOP3PModsNegs">,
- GIComplexPatternEquiv<VOP3PModsNegs>;
-
-def gi_dotiuvop3pmodsnegabs :
- GIComplexOperandMatcher<s32, "selectVOP3PModsNegAbs">,
- GIComplexPatternEquiv<VOP3PModsNegAbs>;
-
def gi_wmmaopselvop3pmods :
GIComplexOperandMatcher<s32, "selectWMMAOpSelVOP3PMods">,
GIComplexPatternEquiv<WMMAOpSelVOP3PMods>;
@@ -452,6 +440,13 @@ def gi_fp_pow2_to_exponent : GICustomOperandRenderer<"renderFPPow2ToExponent">,
def gi_as_hw_round_mode : GICustomOperandRenderer<"renderRoundMode">,
GISDNodeXFormEquiv<as_hw_round_mode>;
+def gi_VOP3PModsNeg : GICustomOperandRenderer<"renderVOP3PModsNeg">,
+ GISDNodeXFormEquiv<VOP3PModsNeg>;
+def gi_VOP3PModsNegs : GICustomOperandRenderer<"renderVOP3PModsNegs">,
+ GISDNodeXFormEquiv<VOP3PModsNegs>;
+def gi_VOP3PModsNegAbs : GICustomOperandRenderer<"renderVOP3PModsNegAbs">,
+ GISDNodeXFormEquiv<VOP3PModsNegAbs>;
+
def gi_prefetch_loc : GICustomOperandRenderer<"renderPrefetchLoc">,
GISDNodeXFormEquiv<PrefetchLoc>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 39b4200..fb83388 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -3449,63 +3449,6 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PModsDOT(SDValue In, SDValue &Src,
return SelectVOP3PMods(In, Src, SrcMods, true);
}
-// Select neg_lo from the i1 immediate operand.
-bool AMDGPUDAGToDAGISel::SelectVOP3PModsNeg(SDValue In, SDValue &Src) const {
- const ConstantSDNode *C = cast<ConstantSDNode>(In);
- // Literal i1 value set in intrinsic, represents SrcMods for the next operand.
- // 1 promotes packed values to signed, 0 treats them as unsigned.
- assert(C->getAPIntValue().getBitWidth() == 1 && "expected i1 value");
-
- unsigned Mods = SISrcMods::OP_SEL_1;
- unsigned SrcSign = C->getZExtValue();
- if (SrcSign == 1)
- Mods ^= SISrcMods::NEG;
-
- Src = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
- return true;
-}
-
-// Select both neg_lo and neg_hi from the i1 immediate operand. This is
-// specifically for F16/BF16 operands in WMMA instructions, where neg_lo applies
-// to matrix's even k elements, and neg_hi applies to matrix's odd k elements.
-bool AMDGPUDAGToDAGISel::SelectVOP3PModsNegs(SDValue In, SDValue &Src) const {
- const ConstantSDNode *C = cast<ConstantSDNode>(In);
- // Literal i1 value set in intrinsic, represents SrcMods for the next operand.
- // 1 promotes packed values to signed, 0 treats them as unsigned.
- assert(C->getAPIntValue().getBitWidth() == 1 && "expected i1 value");
-
- unsigned Mods = SISrcMods::OP_SEL_1;
- unsigned SrcSign = C->getZExtValue();
- if (SrcSign == 1)
- Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI);
-
- Src = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
- return true;
-}
-
-// Select neg, abs, or both neg and abs from the i16 immediate operans.
-bool AMDGPUDAGToDAGISel::SelectVOP3PModsNegAbs(SDValue In, SDValue &Src) const {
- const ConstantSDNode *C = cast<ConstantSDNode>(In);
- unsigned Mods = SISrcMods::OP_SEL_1;
- unsigned SrcMod = C->getZExtValue();
- switch (SrcMod) {
- default: // Any other value will be silently ignored (considered as 0).
- break;
- case 1:
- Mods ^= SISrcMods::NEG;
- break;
- case 2:
- Mods ^= SISrcMods::ABS;
- break;
- case 3:
- Mods ^= (SISrcMods::NEG | SISrcMods::ABS);
- break;
- }
-
- Src = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
- return true;
-}
-
bool AMDGPUDAGToDAGISel::SelectWMMAOpSelVOP3PMods(SDValue In,
SDValue &Src) const {
const ConstantSDNode *C = cast<ConstantSDNode>(In);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
index 983f1aa..16388e7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
@@ -241,9 +241,6 @@ private:
bool IsDOT = false) const;
bool SelectVOP3PModsDOT(SDValue In, SDValue &Src, SDValue &SrcMods) const;
- bool SelectVOP3PModsNeg(SDValue In, SDValue &Src) const;
- bool SelectVOP3PModsNegs(SDValue In, SDValue &Src) const;
- bool SelectVOP3PModsNegAbs(SDValue In, SDValue &Src) const;
bool SelectWMMAOpSelVOP3PMods(SDValue In, SDValue &Src) const;
bool SelectWMMAModsF32NegAbs(SDValue In, SDValue &Src,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 31c4f62..7771f9b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -367,6 +367,18 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setTruncStoreAction(MVT::v4f64, MVT::v4bf16, Expand);
setTruncStoreAction(MVT::v4f64, MVT::v4f16, Expand);
+ setTruncStoreAction(MVT::v5i32, MVT::v5i1, Expand);
+ setTruncStoreAction(MVT::v5i32, MVT::v5i8, Expand);
+ setTruncStoreAction(MVT::v5i32, MVT::v5i16, Expand);
+
+ setTruncStoreAction(MVT::v6i32, MVT::v6i1, Expand);
+ setTruncStoreAction(MVT::v6i32, MVT::v6i8, Expand);
+ setTruncStoreAction(MVT::v6i32, MVT::v6i16, Expand);
+
+ setTruncStoreAction(MVT::v7i32, MVT::v7i1, Expand);
+ setTruncStoreAction(MVT::v7i32, MVT::v7i8, Expand);
+ setTruncStoreAction(MVT::v7i32, MVT::v7i16, Expand);
+
setTruncStoreAction(MVT::v8f64, MVT::v8f32, Expand);
setTruncStoreAction(MVT::v8f64, MVT::v8bf16, Expand);
setTruncStoreAction(MVT::v8f64, MVT::v8f16, Expand);
@@ -589,14 +601,6 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setSchedulingPreference(Sched::RegPressure);
setJumpIsExpensive(true);
- // FIXME: This is only partially true. If we have to do vector compares, any
- // SGPR pair can be a condition register. If we have a uniform condition, we
- // are better off doing SALU operations, where there is only one SCC. For now,
- // we don't have a way of knowing during instruction selection if a condition
- // will be uniform and we always use vector compares. Assume we are using
- // vector compares until that is fixed.
- setHasMultipleConditionRegisters(true);
-
setMinCmpXchgSizeInBits(32);
setSupportsUnalignedAtomics(false);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index 39bb0ad..fd5d5b8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -388,6 +388,16 @@ public:
MVT getFenceOperandTy(const DataLayout &DL) const override {
return MVT::i32;
}
+
+ bool hasMultipleConditionRegisters(EVT VT) const override {
+ // FIXME: This is only partially true. If we have to do vector compares, any
+ // SGPR pair can be a condition register. If we have a uniform condition, we
+ // are better off doing SALU operations, where there is only one SCC. For
+ // now, we don't have a way of knowing during instruction selection if a
+ // condition will be uniform and we always use vector compares. Assume we
+ // are using vector compares until that is fixed.
+ return true;
+ }
};
namespace AMDGPUISD {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index f2207ff..4fe5d00 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -1694,7 +1694,9 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
NewII->takeName(&II);
return IC.replaceInstUsesWith(II, NewII);
}
- case Intrinsic::amdgcn_wmma_f32_16x16x128_f8f6f4: {
+ case Intrinsic::amdgcn_wmma_f32_16x16x128_f8f6f4:
+ case Intrinsic::amdgcn_wmma_scale_f32_16x16x128_f8f6f4:
+ case Intrinsic::amdgcn_wmma_scale16_f32_16x16x128_f8f6f4: {
Value *Src0 = II.getArgOperand(1);
Value *Src1 = II.getArgOperand(3);
unsigned FmtA = cast<ConstantInt>(II.getArgOperand(0))->getZExtValue();
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index b0d3b12..b7fd131 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -4988,66 +4988,6 @@ AMDGPUInstructionSelector::selectVOP3PModsDOT(MachineOperand &Root) const {
return selectVOP3PRetHelper(Root, true);
}
-// Select neg_lo from the i1 immediate operand.
-InstructionSelector::ComplexRendererFns
-AMDGPUInstructionSelector::selectVOP3PModsNeg(MachineOperand &Root) const {
- // Literal i1 value set in intrinsic, represents SrcMods for the next operand.
- // Value is in Imm operand as i1 sign extended to int64_t.
- // 1(-1) promotes packed values to signed, 0 treats them as unsigned.
- assert((Root.isImm() && (Root.getImm() == -1 || Root.getImm() == 0)) &&
- "expected i1 value");
- unsigned Mods = SISrcMods::OP_SEL_1;
- if (Root.getImm() == -1)
- Mods ^= SISrcMods::NEG;
- return {{
- [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods
- }};
-}
-
-// Select both neg_lo and neg_hi from the i1 immediate operand. This is
-// specifically for F16/BF16 operands in WMMA instructions, where neg_lo applies
-// to matrix's even k elements, and neg_hi applies to matrix's odd k elements.
-InstructionSelector::ComplexRendererFns
-AMDGPUInstructionSelector::selectVOP3PModsNegs(MachineOperand &Root) const {
- // Literal i1 value set in intrinsic, represents SrcMods for the next operand.
- // Value is in Imm operand as i1 sign extended to int64_t.
- // 1(-1) promotes packed values to signed, 0 treats them as unsigned.
- assert((Root.isImm() && (Root.getImm() == -1 || Root.getImm() == 0)) &&
- "expected i1 value");
- unsigned Mods = SISrcMods::OP_SEL_1;
- if (Root.getImm() == -1)
- Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI);
- return {{
- [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods
- }};
-}
-
-// Select neg, abs, or both neg and abs from the i16 immediate operans.
-InstructionSelector::ComplexRendererFns
-AMDGPUInstructionSelector::selectVOP3PModsNegAbs(MachineOperand &Root) const {
-
- assert(Root.isImm() && "Modifier for C must be an immediate");
-
- unsigned Mods = SISrcMods::OP_SEL_1;
- switch (Root.getImm()) {
- default: // Any other value will be silently ignored (considered as 0).
- break;
- case 1:
- Mods ^= SISrcMods::NEG;
- break;
- case 2:
- Mods ^= SISrcMods::ABS;
- break;
- case 3:
- Mods ^= (SISrcMods::NEG | SISrcMods::ABS);
- break;
- }
-
- return {{
- [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods
- }};
-}
-
InstructionSelector::ComplexRendererFns
AMDGPUInstructionSelector::selectWMMAOpSelVOP3PMods(
MachineOperand &Root) const {
@@ -7102,6 +7042,38 @@ void AMDGPUInstructionSelector::renderRoundMode(MachineInstrBuilder &MIB,
MIB.addImm((MI.getOperand(OpIdx).getImm() + 3) % 4);
}
+void AMDGPUInstructionSelector::renderVOP3PModsNeg(MachineInstrBuilder &MIB,
+ const MachineInstr &MI,
+ int OpIdx) const {
+ unsigned Mods = SISrcMods::OP_SEL_1;
+ if (MI.getOperand(OpIdx).getImm())
+ Mods ^= SISrcMods::NEG;
+ MIB.addImm((int64_t)Mods);
+}
+
+void AMDGPUInstructionSelector::renderVOP3PModsNegs(MachineInstrBuilder &MIB,
+ const MachineInstr &MI,
+ int OpIdx) const {
+ unsigned Mods = SISrcMods::OP_SEL_1;
+ if (MI.getOperand(OpIdx).getImm())
+ Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI);
+ MIB.addImm((int64_t)Mods);
+}
+
+void AMDGPUInstructionSelector::renderVOP3PModsNegAbs(MachineInstrBuilder &MIB,
+ const MachineInstr &MI,
+ int OpIdx) const {
+ unsigned Val = MI.getOperand(OpIdx).getImm();
+ unsigned Mods = SISrcMods::OP_SEL_1; // default: none
+ if (Val == 1) // neg
+ Mods ^= SISrcMods::NEG;
+ if (Val == 2) // abs
+ Mods ^= SISrcMods::ABS;
+ if (Val == 3) // neg and abs
+ Mods ^= (SISrcMods::NEG | SISrcMods::ABS);
+ MIB.addImm((int64_t)Mods);
+}
+
void AMDGPUInstructionSelector::renderPrefetchLoc(MachineInstrBuilder &MIB,
const MachineInstr &MI,
int OpIdx) const {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index 140e753..c9da419 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -200,13 +200,6 @@ private:
selectVOP3PModsDOT(MachineOperand &Root) const;
InstructionSelector::ComplexRendererFns
- selectVOP3PModsNeg(MachineOperand &Root) const;
- InstructionSelector::ComplexRendererFns
- selectVOP3PModsNegs(MachineOperand &Root) const;
- InstructionSelector::ComplexRendererFns
- selectVOP3PModsNegAbs(MachineOperand &Root) const;
-
- InstructionSelector::ComplexRendererFns
selectWMMAOpSelVOP3PMods(MachineOperand &Root) const;
InstructionSelector::ComplexRendererFns
@@ -419,6 +412,13 @@ private:
void renderRoundMode(MachineInstrBuilder &MIB, const MachineInstr &MI,
int OpIdx) const;
+ void renderVOP3PModsNeg(MachineInstrBuilder &MIB, const MachineInstr &MI,
+ int OpIdx) const;
+ void renderVOP3PModsNegs(MachineInstrBuilder &MIB, const MachineInstr &MI,
+ int OpIdx) const;
+ void renderVOP3PModsNegAbs(MachineInstrBuilder &MIB, const MachineInstr &MI,
+ int OpIdx) const;
+
void renderPrefetchLoc(MachineInstrBuilder &MIB, const MachineInstr &MI,
int OpIdx) const;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
index d443f4e..2d8f259 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
@@ -236,7 +236,7 @@ cl::opt<LoweringKind> LoweringKindLoc(
"Lower via mixture of above strategies")));
template <typename T> std::vector<T> sortByName(std::vector<T> &&V) {
- llvm::sort(V.begin(), V.end(), [](const auto *L, const auto *R) {
+ llvm::sort(V, [](const auto *L, const auto *R) {
return L->getName() < R->getName();
});
return {std::move(V)};
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 5aa0ebf..74230a5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -4603,6 +4603,42 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case Intrinsic::amdgcn_cvt_scale_pk8_f32_fp8:
case Intrinsic::amdgcn_cvt_scale_pk8_f32_bf8:
case Intrinsic::amdgcn_cvt_scale_pk8_f32_fp4:
+ case Intrinsic::amdgcn_cvt_scale_pk16_f16_fp6:
+ case Intrinsic::amdgcn_cvt_scale_pk16_bf16_fp6:
+ case Intrinsic::amdgcn_cvt_scale_pk16_f16_bf6:
+ case Intrinsic::amdgcn_cvt_scale_pk16_bf16_bf6:
+ case Intrinsic::amdgcn_cvt_scale_pk16_f32_fp6:
+ case Intrinsic::amdgcn_cvt_scale_pk16_f32_bf6:
+ case Intrinsic::amdgcn_cvt_scalef32_pk8_fp8_bf16:
+ case Intrinsic::amdgcn_cvt_scalef32_pk8_bf8_bf16:
+ case Intrinsic::amdgcn_cvt_scalef32_pk8_fp8_f16:
+ case Intrinsic::amdgcn_cvt_scalef32_pk8_bf8_f16:
+ case Intrinsic::amdgcn_cvt_scalef32_pk8_fp8_f32:
+ case Intrinsic::amdgcn_cvt_scalef32_pk8_bf8_f32:
+ case Intrinsic::amdgcn_cvt_scalef32_pk8_fp4_f32:
+ case Intrinsic::amdgcn_cvt_scalef32_pk8_fp4_f16:
+ case Intrinsic::amdgcn_cvt_scalef32_pk8_fp4_bf16:
+ case Intrinsic::amdgcn_cvt_scalef32_pk16_fp6_f32:
+ case Intrinsic::amdgcn_cvt_scalef32_pk16_bf6_f32:
+ case Intrinsic::amdgcn_cvt_scalef32_pk16_fp6_f16:
+ case Intrinsic::amdgcn_cvt_scalef32_pk16_bf6_f16:
+ case Intrinsic::amdgcn_cvt_scalef32_pk16_fp6_bf16:
+ case Intrinsic::amdgcn_cvt_scalef32_pk16_bf6_bf16:
+ case Intrinsic::amdgcn_cvt_scalef32_sr_pk8_fp8_bf16:
+ case Intrinsic::amdgcn_cvt_scalef32_sr_pk8_bf8_bf16:
+ case Intrinsic::amdgcn_cvt_scalef32_sr_pk8_fp8_f16:
+ case Intrinsic::amdgcn_cvt_scalef32_sr_pk8_bf8_f16:
+ case Intrinsic::amdgcn_cvt_scalef32_sr_pk8_fp8_f32:
+ case Intrinsic::amdgcn_cvt_scalef32_sr_pk8_bf8_f32:
+ case Intrinsic::amdgcn_cvt_scalef32_sr_pk8_fp4_f32:
+ case Intrinsic::amdgcn_cvt_scalef32_sr_pk8_fp4_f16:
+ case Intrinsic::amdgcn_cvt_scalef32_sr_pk8_fp4_bf16:
+ case Intrinsic::amdgcn_cvt_scalef32_sr_pk16_fp6_f32:
+ case Intrinsic::amdgcn_cvt_scalef32_sr_pk16_bf6_f32:
+ case Intrinsic::amdgcn_cvt_scalef32_sr_pk16_fp6_f16:
+ case Intrinsic::amdgcn_cvt_scalef32_sr_pk16_bf6_f16:
+ case Intrinsic::amdgcn_cvt_scalef32_sr_pk16_fp6_bf16:
+ case Intrinsic::amdgcn_cvt_scalef32_sr_pk16_bf6_bf16:
case Intrinsic::amdgcn_sat_pk4_i4_i8:
case Intrinsic::amdgcn_sat_pk4_u4_u8:
case Intrinsic::amdgcn_fmed3:
@@ -4762,6 +4798,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case Intrinsic::amdgcn_wmma_f32_16x16x128_bf8_bf8:
case Intrinsic::amdgcn_wmma_i32_16x16x64_iu8:
case Intrinsic::amdgcn_wmma_f32_16x16x128_f8f6f4:
+ case Intrinsic::amdgcn_wmma_scale_f32_16x16x128_f8f6f4:
+ case Intrinsic::amdgcn_wmma_scale16_f32_16x16x128_f8f6f4:
case Intrinsic::amdgcn_wmma_f32_32x16x128_f4:
case Intrinsic::amdgcn_swmmac_f16_16x16x64_f16:
case Intrinsic::amdgcn_swmmac_bf16_16x16x64_bf16:
@@ -4777,6 +4815,9 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case Intrinsic::amdgcn_swmmac_f16_16x16x128_bf8_fp8:
case Intrinsic::amdgcn_swmmac_f16_16x16x128_bf8_bf8:
case Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8:
+ case Intrinsic::amdgcn_perm_pk16_b4_u4:
+ case Intrinsic::amdgcn_perm_pk16_b6_u4:
+ case Intrinsic::amdgcn_perm_pk16_b8_u4:
return getDefaultMappingVOP(MI);
case Intrinsic::amdgcn_log:
case Intrinsic::amdgcn_exp2:
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index a83caa0..d33765d 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -178,6 +178,10 @@ public:
ImmTyBitOp3,
ImmTyMatrixAFMT,
ImmTyMatrixBFMT,
+ ImmTyMatrixAScale,
+ ImmTyMatrixBScale,
+ ImmTyMatrixAScaleFmt,
+ ImmTyMatrixBScaleFmt,
ImmTyMatrixAReuse,
ImmTyMatrixBReuse,
ImmTyScaleSel,
@@ -428,6 +432,10 @@ public:
bool isIndexKey32bit() const { return isImmTy(ImmTyIndexKey32bit); }
bool isMatrixAFMT() const { return isImmTy(ImmTyMatrixAFMT); }
bool isMatrixBFMT() const { return isImmTy(ImmTyMatrixBFMT); }
+ bool isMatrixAScale() const { return isImmTy(ImmTyMatrixAScale); }
+ bool isMatrixBScale() const { return isImmTy(ImmTyMatrixBScale); }
+ bool isMatrixAScaleFmt() const { return isImmTy(ImmTyMatrixAScaleFmt); }
+ bool isMatrixBScaleFmt() const { return isImmTy(ImmTyMatrixBScaleFmt); }
bool isMatrixAReuse() const { return isImmTy(ImmTyMatrixAReuse); }
bool isMatrixBReuse() const { return isImmTy(ImmTyMatrixBReuse); }
bool isTFE() const { return isImmTy(ImmTyTFE); }
@@ -1183,6 +1191,10 @@ public:
case ImmTyBitOp3: OS << "BitOp3"; break;
case ImmTyMatrixAFMT: OS << "ImmTyMatrixAFMT"; break;
case ImmTyMatrixBFMT: OS << "ImmTyMatrixBFMT"; break;
+ case ImmTyMatrixAScale: OS << "ImmTyMatrixAScale"; break;
+ case ImmTyMatrixBScale: OS << "ImmTyMatrixBScale"; break;
+ case ImmTyMatrixAScaleFmt: OS << "ImmTyMatrixAScaleFmt"; break;
+ case ImmTyMatrixBScaleFmt: OS << "ImmTyMatrixBScaleFmt"; break;
case ImmTyMatrixAReuse: OS << "ImmTyMatrixAReuse"; break;
case ImmTyMatrixBReuse: OS << "ImmTyMatrixBReuse"; break;
case ImmTyScaleSel: OS << "ScaleSel" ; break;
@@ -1728,6 +1740,14 @@ public:
AMDGPUOperand::ImmTy Type);
ParseStatus parseMatrixAFMT(OperandVector &Operands);
ParseStatus parseMatrixBFMT(OperandVector &Operands);
+ ParseStatus tryParseMatrixScale(OperandVector &Operands, StringRef Name,
+ AMDGPUOperand::ImmTy Type);
+ ParseStatus parseMatrixAScale(OperandVector &Operands);
+ ParseStatus parseMatrixBScale(OperandVector &Operands);
+ ParseStatus tryParseMatrixScaleFmt(OperandVector &Operands, StringRef Name,
+ AMDGPUOperand::ImmTy Type);
+ ParseStatus parseMatrixAScaleFmt(OperandVector &Operands);
+ ParseStatus parseMatrixBScaleFmt(OperandVector &Operands);
ParseStatus parseDfmtNfmt(int64_t &Format);
ParseStatus parseUfmt(int64_t &Format);
@@ -7356,6 +7376,42 @@ ParseStatus AMDGPUAsmParser::parseMatrixBFMT(OperandVector &Operands) {
AMDGPUOperand::ImmTyMatrixBFMT);
}
+ParseStatus AMDGPUAsmParser::tryParseMatrixScale(OperandVector &Operands,
+ StringRef Name,
+ AMDGPUOperand::ImmTy Type) {
+ return parseStringOrIntWithPrefix(
+ Operands, Name, {"MATRIX_SCALE_ROW0", "MATRIX_SCALE_ROW1"}, Type);
+}
+
+ParseStatus AMDGPUAsmParser::parseMatrixAScale(OperandVector &Operands) {
+ return tryParseMatrixScale(Operands, "matrix_a_scale",
+ AMDGPUOperand::ImmTyMatrixAScale);
+}
+
+ParseStatus AMDGPUAsmParser::parseMatrixBScale(OperandVector &Operands) {
+ return tryParseMatrixScale(Operands, "matrix_b_scale",
+ AMDGPUOperand::ImmTyMatrixBScale);
+}
+
+ParseStatus AMDGPUAsmParser::tryParseMatrixScaleFmt(OperandVector &Operands,
+ StringRef Name,
+ AMDGPUOperand::ImmTy Type) {
+ return parseStringOrIntWithPrefix(
+ Operands, Name,
+ {"MATRIX_SCALE_FMT_E8", "MATRIX_SCALE_FMT_E5M3", "MATRIX_SCALE_FMT_E4M3"},
+ Type);
+}
+
+ParseStatus AMDGPUAsmParser::parseMatrixAScaleFmt(OperandVector &Operands) {
+ return tryParseMatrixScaleFmt(Operands, "matrix_a_scale_fmt",
+ AMDGPUOperand::ImmTyMatrixAScaleFmt);
+}
+
+ParseStatus AMDGPUAsmParser::parseMatrixBScaleFmt(OperandVector &Operands) {
+ return tryParseMatrixScaleFmt(Operands, "matrix_b_scale_fmt",
+ AMDGPUOperand::ImmTyMatrixBScaleFmt);
+}
+
// dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
// values to live in a joint format operand in the MCInst encoding.
ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
@@ -9489,6 +9545,34 @@ void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
AMDGPUOperand::ImmTyMatrixBFMT, 0);
}
+ int MatrixAScaleIdx =
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_a_scale);
+ if (MatrixAScaleIdx != -1) {
+ addOptionalImmOperand(Inst, Operands, OptIdx,
+ AMDGPUOperand::ImmTyMatrixAScale, 0);
+ }
+
+ int MatrixBScaleIdx =
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_b_scale);
+ if (MatrixBScaleIdx != -1) {
+ addOptionalImmOperand(Inst, Operands, OptIdx,
+ AMDGPUOperand::ImmTyMatrixBScale, 0);
+ }
+
+ int MatrixAScaleFmtIdx =
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_a_scale_fmt);
+ if (MatrixAScaleFmtIdx != -1) {
+ addOptionalImmOperand(Inst, Operands, OptIdx,
+ AMDGPUOperand::ImmTyMatrixAScaleFmt, 0);
+ }
+
+ int MatrixBScaleFmtIdx =
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_b_scale_fmt);
+ if (MatrixBScaleFmtIdx != -1) {
+ addOptionalImmOperand(Inst, Operands, OptIdx,
+ AMDGPUOperand::ImmTyMatrixBScaleFmt, 0);
+ }
+
if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::matrix_a_reuse))
addOptionalImmOperand(Inst, Operands, OptIdx,
AMDGPUOperand::ImmTyMatrixAReuse, 0);
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index ffe6b06..fef0d7e 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -598,6 +598,13 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
// Try to decode DPP and SDWA first to solve conflict with VOP1 and VOP2
// encodings
+ if (isGFX1250() && Bytes.size() >= 16) {
+ DecoderUInt128 DecW = eat16Bytes(Bytes);
+ if (tryDecodeInst(DecoderTableGFX1250128, MI, DecW, Address, CS))
+ break;
+ Bytes = Bytes_.slice(0, MaxInstBytesNum);
+ }
+
if (isGFX11Plus() && Bytes.size() >= 12 ) {
DecoderUInt128 DecW = eat12Bytes(Bytes);
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 6fe3abc..c84ba1a 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -236,6 +236,7 @@ protected:
bool Has64BitLiterals = false;
bool HasBitOp3Insts = false;
bool HasTanhInsts = false;
+ bool HasTensorCvtLutInsts = false;
bool HasTransposeLoadF4F6Insts = false;
bool HasPrngInst = false;
bool HasBVHDualAndBVH8Insts = false;
@@ -1411,6 +1412,8 @@ public:
bool hasTanhInsts() const { return HasTanhInsts; }
+ bool hasTensorCvtLutInsts() const { return HasTensorCvtLutInsts; }
+
bool hasAddPC64Inst() const { return GFX1250Insts; }
bool hasMinimum3Maximum3PKF16() const {
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
index 86d56855..4e4660c 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
@@ -33,8 +33,7 @@ public:
AMDGPUAsmBackend(const Target &T) : MCAsmBackend(llvm::endianness::little) {}
void applyFixup(const MCFragment &, const MCFixup &, const MCValue &Target,
- MutableArrayRef<char> Data, uint64_t Value,
- bool IsResolved) override;
+ uint8_t *Data, uint64_t Value, bool IsResolved) override;
bool fixupNeedsRelaxation(const MCFixup &Fixup,
uint64_t Value) const override;
@@ -129,9 +128,8 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
}
void AMDGPUAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
- const MCValue &Target,
- MutableArrayRef<char> Data, uint64_t Value,
- bool IsResolved) {
+ const MCValue &Target, uint8_t *Data,
+ uint64_t Value, bool IsResolved) {
if (Target.getSpecifier())
IsResolved = false;
maybeAddReloc(F, Fixup, Target, Value, IsResolved);
@@ -148,13 +146,13 @@ void AMDGPUAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
Value <<= Info.TargetOffset;
unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind());
- uint32_t Offset = Fixup.getOffset();
- assert(Offset + NumBytes <= F.getSize() && "Invalid fixup offset!");
+ assert(Fixup.getOffset() + NumBytes <= F.getSize() &&
+ "Invalid fixup offset!");
// For each byte of the fragment that the fixup touches, mask in the bits from
// the fixup value.
for (unsigned i = 0; i != NumBytes; ++i)
- Data[Offset + i] |= static_cast<uint8_t>((Value >> (i * 8)) & 0xff);
+ Data[i] |= static_cast<uint8_t>((Value >> (i * 8)) & 0xff);
}
std::optional<MCFixupKind>
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
index 42c4d8b..ee8683a 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -1393,6 +1393,75 @@ void AMDGPUInstPrinter::printMatrixBFMT(const MCInst *MI, unsigned OpNo,
printMatrixFMT(MI, OpNo, STI, O, 'b');
}
+void AMDGPUInstPrinter::printMatrixScale(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O, char AorB) {
+ auto Imm = MI->getOperand(OpNo).getImm() & 1;
+ if (Imm == 0)
+ return;
+
+ O << " matrix_" << AorB << "_scale:";
+ switch (Imm) {
+ default:
+ O << Imm;
+ break;
+ case WMMA::MatrixScale::MATRIX_SCALE_ROW0:
+ O << "MATRIX_SCALE_ROW0";
+ break;
+ case WMMA::MatrixScale::MATRIX_SCALE_ROW1:
+ O << "MATRIX_SCALE_ROW1";
+ break;
+ }
+}
+
+void AMDGPUInstPrinter::printMatrixAScale(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ printMatrixScale(MI, OpNo, STI, O, 'a');
+}
+
+void AMDGPUInstPrinter::printMatrixBScale(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ printMatrixScale(MI, OpNo, STI, O, 'b');
+}
+
+void AMDGPUInstPrinter::printMatrixScaleFmt(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O, char AorB) {
+ auto Imm = MI->getOperand(OpNo).getImm() & 3;
+ if (Imm == 0)
+ return;
+
+ O << " matrix_" << AorB << "_scale_fmt:";
+ switch (Imm) {
+ default:
+ O << Imm;
+ break;
+ case WMMA::MatrixScaleFmt::MATRIX_SCALE_FMT_E8:
+ O << "MATRIX_SCALE_FMT_E8";
+ break;
+ case WMMA::MatrixScaleFmt::MATRIX_SCALE_FMT_E5M3:
+ O << "MATRIX_SCALE_FMT_E5M3";
+ break;
+ case WMMA::MatrixScaleFmt::MATRIX_SCALE_FMT_E4M3:
+ O << "MATRIX_SCALE_FMT_E4M3";
+ break;
+ }
+}
+
+void AMDGPUInstPrinter::printMatrixAScaleFmt(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ printMatrixScaleFmt(MI, OpNo, STI, O, 'a');
+}
+
+void AMDGPUInstPrinter::printMatrixBScaleFmt(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ printMatrixScaleFmt(MI, OpNo, STI, O, 'b');
+}
+
void AMDGPUInstPrinter::printInterpSlot(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI,
raw_ostream &O) {
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
index f6739b14..be32061c 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
@@ -140,6 +140,19 @@ private:
const MCSubtargetInfo &STI, raw_ostream &O);
void printMatrixBFMT(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
+ void printMatrixScale(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O, char AorB);
+ void printMatrixAScale(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printMatrixBScale(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printMatrixScaleFmt(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O,
+ char AorB);
+ void printMatrixAScaleFmt(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printMatrixBScaleFmt(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
void printInterpSlot(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
void printInterpAttr(const MCInst *MI, unsigned OpNo,
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp
index ffdac8b..fa0c95f 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp
@@ -75,8 +75,9 @@ unsigned AMDGPUMCAsmInfo::getMaxInstLength(const MCSubtargetInfo *STI) const {
if (STI->hasFeature(AMDGPU::FeatureNSAEncoding))
return 20;
- // VOP3PX encoding.
- if (STI->hasFeature(AMDGPU::FeatureGFX950Insts))
+ // VOP3PX/VOP3PX2 encoding.
+ if (STI->hasFeature(AMDGPU::FeatureGFX950Insts) ||
+ STI->hasFeature(AMDGPU::FeatureGFX1250Insts))
return 16;
// 64-bit instruction with 32-bit literal.
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index 43ca548..68302f0 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -872,14 +872,14 @@ void AMDGPUTargetELFStreamer::EmitAMDKernelCodeT(AMDGPUMCKernelCodeT &Header) {
void AMDGPUTargetELFStreamer::EmitAMDGPUSymbolType(StringRef SymbolName,
unsigned Type) {
- MCSymbolELF *Symbol = cast<MCSymbolELF>(
+ auto *Symbol = static_cast<MCSymbolELF *>(
getStreamer().getContext().getOrCreateSymbol(SymbolName));
Symbol->setType(Type);
}
void AMDGPUTargetELFStreamer::emitAMDGPULDS(MCSymbol *Symbol, unsigned Size,
Align Alignment) {
- MCSymbolELF *SymbolELF = cast<MCSymbolELF>(Symbol);
+ auto *SymbolELF = static_cast<MCSymbolELF *>(Symbol);
SymbolELF->setType(ELF::STT_OBJECT);
if (!SymbolELF->isBindingSet())
@@ -974,9 +974,9 @@ void AMDGPUTargetELFStreamer::EmitAmdhsaKernelDescriptor(
auto &Streamer = getStreamer();
auto &Context = Streamer.getContext();
- MCSymbolELF *KernelCodeSymbol = cast<MCSymbolELF>(
- Context.getOrCreateSymbol(Twine(KernelName)));
- MCSymbolELF *KernelDescriptorSymbol = cast<MCSymbolELF>(
+ auto *KernelCodeSymbol =
+ static_cast<MCSymbolELF *>(Context.getOrCreateSymbol(Twine(KernelName)));
+ auto *KernelDescriptorSymbol = static_cast<MCSymbolELF *>(
Context.getOrCreateSymbol(Twine(KernelName) + Twine(".kd")));
// Copy kernel descriptor symbol's binding, other and visibility from the
diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h
index c564145..deadb7a 100644
--- a/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -1018,6 +1018,17 @@ enum MatrixFMT : unsigned {
MATRIX_FMT_BF6 = 3,
MATRIX_FMT_FP4 = 4
};
+
+enum MatrixScale : unsigned {
+ MATRIX_SCALE_ROW0 = 0,
+ MATRIX_SCALE_ROW1 = 1,
+};
+
+enum MatrixScaleFmt : unsigned {
+ MATRIX_SCALE_FMT_E8 = 0,
+ MATRIX_SCALE_FMT_E5M3 = 1,
+ MATRIX_SCALE_FMT_E4M3 = 2
+};
} // namespace WMMA
namespace VOP3PEncoding {
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index e934152..0c653b1 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -1169,11 +1169,18 @@ void SIFoldOperandsImpl::foldOperand(
// Grab the use operands first
SmallVector<MachineOperand *, 4> UsesToProcess(
llvm::make_pointer_range(MRI->use_nodbg_operands(RegSeqDstReg)));
- for (auto *RSUse : UsesToProcess) {
+ for (unsigned I = 0; I != UsesToProcess.size(); ++I) {
+ MachineOperand *RSUse = UsesToProcess[I];
MachineInstr *RSUseMI = RSUse->getParent();
unsigned OpNo = RSUseMI->getOperandNo(RSUse);
if (SplatRC) {
+ if (RSUseMI->isCopy()) {
+ Register DstReg = RSUseMI->getOperand(0).getReg();
+ append_range(UsesToProcess,
+ make_pointer_range(MRI->use_nodbg_operands(DstReg)));
+ continue;
+ }
if (tryFoldRegSeqSplat(RSUseMI, OpNo, SplatVal, SplatRC)) {
FoldableDef SplatDef(SplatVal, SplatRC);
appendFoldCandidate(FoldList, RSUseMI, OpNo, SplatDef);
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index a3e20ba..c552f1a 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -908,6 +908,32 @@ def SupportedRoundMode : TImmLeaf<i32, [{
Imm == (int)RoundingMode::TowardNegative;
}]>;
+def VOP3PModsNeg : SDNodeXForm<timm, [{
+ unsigned Mods = SISrcMods::OP_SEL_1;
+ if (N->getZExtValue())
+ Mods ^= SISrcMods::NEG;
+ return CurDAG->getTargetConstant(Mods, SDLoc(N), MVT::i32);
+}]>;
+
+def VOP3PModsNegs : SDNodeXForm<timm, [{
+ unsigned Mods = SISrcMods::OP_SEL_1;
+ if (N->getZExtValue())
+ Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI);
+ return CurDAG->getTargetConstant(Mods, SDLoc(N), MVT::i32);
+}]>;
+
+def VOP3PModsNegAbs : SDNodeXForm<timm, [{
+ unsigned Val = N->getZExtValue();
+ unsigned Mods = SISrcMods::OP_SEL_1; // default: none
+ if (Val == 1) // neg
+ Mods ^= SISrcMods::NEG;
+ if (Val == 2) // abs
+ Mods ^= SISrcMods::ABS;
+ if (Val == 3) // neg and abs
+ Mods ^= (SISrcMods::NEG | SISrcMods::ABS);
+ return CurDAG->getTargetConstant(Mods, SDLoc(N), MVT::i32);
+}]>;
+
class bitextract_imm<int bitnum> : SDNodeXForm<imm, [{
uint64_t Imm = N->getZExtValue();
unsigned Bit = (Imm >> }] # bitnum # [{ ) & 1;
@@ -1310,6 +1336,12 @@ def bitop3_0 : DefaultOperand<BitOp3, 0>;
def MatrixAFMT : CustomOperand<i32, 1, "MatrixAFMT">;
def MatrixBFMT : CustomOperand<i32, 1, "MatrixBFMT">;
+def MatrixAScale : CustomOperand<i32, 1, "MatrixAScale">;
+def MatrixBScale : CustomOperand<i32, 1, "MatrixBScale">;
+
+def MatrixAScaleFmt : CustomOperand<i32, 1, "MatrixAScaleFmt">;
+def MatrixBScaleFmt : CustomOperand<i32, 1, "MatrixBScaleFmt">;
+
def MatrixAReuse : NamedBitOperand<"matrix_a_reuse">;
def MatrixBReuse : NamedBitOperand<"matrix_b_reuse">;
@@ -1647,9 +1679,6 @@ def VOP3OMods : ComplexPattern<untyped, 3, "SelectVOP3OMods">;
def VOP3PMods : ComplexPattern<untyped, 2, "SelectVOP3PMods">;
def VOP3PModsDOT : ComplexPattern<untyped, 2, "SelectVOP3PModsDOT">;
-def VOP3PModsNeg : ComplexPattern<untyped, 1, "SelectVOP3PModsNeg">;
-def VOP3PModsNegs : ComplexPattern<untyped, 1, "SelectVOP3PModsNegs">; // chfang: not use complex pattern?
-def VOP3PModsNegAbs : ComplexPattern<untyped, 1, "SelectVOP3PModsNegAbs">;
def WMMAOpSelVOP3PMods : ComplexPattern<untyped, 1, "SelectWMMAOpSelVOP3PMods">;
def WMMAModsF32NegAbs : ComplexPattern<untyped, 2, "SelectWMMAModsF32NegAbs">;
@@ -1774,6 +1803,7 @@ class getVALUDstForVT<ValueType VT, bit IsTrue16 = 0, bit IsVOP3Encoding = 0> {
!eq(VT.Size, 256) : VOPDstOperand<VReg_256>,
!eq(VT.Size, 192) : VOPDstOperand<VReg_192>,
!eq(VT.Size, 128) : VOPDstOperand<VReg_128>,
+ !eq(VT.Size, 96) : VOPDstOperand<VReg_96>,
!eq(VT.Size, 64) : VOPDstOperand<VReg_64>,
!eq(VT.Size, 32) : VOPDstOperand<VGPR_32>,
!eq(VT.Size, 16) : op16,
@@ -1924,6 +1954,7 @@ class getVOP3DPPSrcForVT<ValueType VT, bit IsFake16 = 1> {
!eq(VT, v2f16) : VCSrc_v2f16,
!eq(VT, v2bf16) : VCSrc_v2bf16,
!eq(VT, f32) : VCSrc_f32,
+ !eq(VT, v2i32) : VCSrc_v2b32,
1 : VCSrc_b32);
}
@@ -2678,6 +2709,8 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> {
field bit HasNeg = HasModifiers;
field bit HasMatrixReuse = 0;
field bit HasMatrixFMT = 0;
+ field bit HasMatrixScale = 0;
+ field bit HasMatrixReuse = 0;
field bit HasSrc0Mods = HasModifiers;
field bit HasSrc1Mods = !if(HasModifiers, !or(HasSrc1FloatMods, HasSrc1IntMods), 0);
@@ -2935,6 +2968,9 @@ def VOP_V2BF16_F32_F32_I32 : VOPProfile <[v2bf16, f32, f32, i32]>;
def VOP_V2F16_F32_F32_I32 : VOPProfile <[v2f16, f32, f32, i32]>;
def VOP_V6I32_V32F16_F32 : VOPProfile<[v6i32, v32f16, f32, untyped]>;
def VOP_V6I32_V32BF16_F32 : VOPProfile<[v6i32, v32bf16, f32, untyped]>;
+def VOP_V3I32_V16F16_F32 : VOPProfile<[v3i32, v16f16, f32, untyped]>;
+def VOP_V3I32_V16BF16_F32 : VOPProfile<[v3i32, v16bf16, f32, untyped]>;
+def VOP_V3I32_V16F32_F32 : VOPProfile<[v3i32, v16f32, f32, untyped]>;
def VOP_V6I32_V16F32_V16F32_F32 : VOPProfile<[v6i32, v16f32, v16f32, f32]>;
def VOP_V2F16_I32_F32 : VOPProfile<[v2f16, i32, f32, untyped]>;
def VOP_V2I16_F32_F32_F32 : VOPProfile<[v2i16, f32, f32, f32]>;
@@ -2948,6 +2984,8 @@ def VOP_BF16_F32_I32 : VOPProfile<[bf16, f32, i32, untyped]>;
def VOP_F16_F32_I32 : VOPProfile<[f16, f32, i32, untyped]>;
def VOP_I32_BF16_I32_F32 : VOPProfile<[i32, bf16, i32, f32]>;
def VOP_I32_F16_I32_F32 : VOPProfile<[i32, f16, i32, f32]>;
+def VOP_V16F16_V3I32_I32 : VOPProfile<[v16f16, v3i32, i32, untyped]>;
+def VOP_V16BF16_V3I32_I32 : VOPProfile<[v16bf16, v3i32, i32, untyped]>;
def VOP_V8F16_V2I32_I32 : VOPProfile<[v8f16, v2i32, i32, untyped]>;
def VOP_V8BF16_V2I32_I32 : VOPProfile<[v8bf16, v2i32, i32, untyped]>;
def VOP_V8F16_I32_I32 : VOPProfile<[v8f16, i32, i32, untyped]>;
@@ -2955,11 +2993,26 @@ def VOP_V8BF16_I32_I32 : VOPProfile<[v8bf16, i32, i32, untyped]>;
def VOP_V16F32_V3I32_I32 : VOPProfile<[v16f32, v3i32, i32, untyped]>;
def VOP_V8F32_V2I32_I32 : VOPProfile<[v8f32, v2i32, i32, untyped]>;
def VOP_V8F32_I32_I32 : VOPProfile<[v8f32, i32, i32, untyped]>;
+def VOP_V2I32_V8BF16_F32 : VOPProfile<[v2i32, v8bf16, f32, untyped]>;
+def VOP_V2I32_V8F16_F32 : VOPProfile<[v2i32, v8f16, f32, untyped]>;
+def VOP_V2I32_V8F32_F32 : VOPProfile<[v2i32, v8f32, f32, untyped]>;
+def VOP_I32_V8F32_F32 : VOPProfile<[i32, v8f32, f32, untyped]>;
+def VOP_I32_V8F16_F32 : VOPProfile<[i32, v8f16, f32, untyped]>;
+def VOP_I32_V8BF16_F32 : VOPProfile<[i32, v8bf16, f32, untyped]>;
def VOP_I32_F32_I32_F32 : VOPProfile<[i32, f32, i32, f32]>;
def VOP_V6I32_V32BF16_I32_F32 : VOPProfile<[v6i32, v32bf16, i32, f32]>;
def VOP_V6I32_V32F16_I32_F32 : VOPProfile<[v6i32, v32f16, i32, f32]>;
def VOP_V6I32_V32F32_I32_F32 : VOPProfile<[v6i32, v32f32, i32, f32]>;
+def VOP_V3I32_V16F16_I32_F32 : VOPProfile<[v3i32, v16f16, i32, f32]>;
+def VOP_V3I32_V16BF16_I32_F32 : VOPProfile<[v3i32, v16bf16, i32, f32]>;
+def VOP_V3I32_V16F32_I32_F32 : VOPProfile<[v3i32, v16f32, i32, f32]>;
+def VOP_V2I32_V8BF16_I32_F32 : VOPProfile<[v2i32, v8bf16, i32, f32]>;
+def VOP_V2I32_V8F16_I32_F32 : VOPProfile<[v2i32, v8f16, i32, f32]>;
+def VOP_V2I32_V8F32_I32_F32 : VOPProfile<[v2i32, v8f32, i32, f32]>;
+def VOP_I32_V8F32_I32_F32 : VOPProfile<[i32, v8f32, i32, f32]>;
+def VOP_I32_V8F16_I32_F32 : VOPProfile<[i32, v8f16, i32, f32]>;
+def VOP_I32_V8BF16_I32_F32 : VOPProfile<[i32, v8bf16, i32, f32]>;
def VOP_I64_I64_I32 : VOPProfile <[i64, i64, i32, untyped]>;
def VOP_I64_I32_I64 : VOPProfile <[i64, i32, i64, untyped]>;
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 54fa192..bd5dfa9 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -3543,14 +3543,21 @@ def : GCNPat <
(vecTy (UniformBinFrag<build_vector> (Ty undef), (Ty SReg_32:$src1))),
(S_LSHL_B32 SReg_32:$src1, (i32 16))
>;
-}
def : GCNPat <
(vecTy (DivergentBinFrag<build_vector> (Ty undef), (Ty VGPR_32:$src1))),
(vecTy (V_LSHLREV_B32_e64 (i32 16), VGPR_32:$src1))
>;
+} // End True16Predicate = ...
} // End foreach Ty = ...
-}
+} // End AddedComplexity = 1
+
+let True16Predicate = UseRealTrue16Insts in
+def : GCNPat <
+ (v2i16 (DivergentBinFrag<build_vector> (i16 undef), (i16 (trunc i32:$src1)))),
+ (REG_SEQUENCE VGPR_32, (i16 (IMPLICIT_DEF)), lo16,
+ (i16 (EXTRACT_SUBREG VGPR_32:$src1, lo16)), hi16)
+>;
let SubtargetPredicate = HasVOP3PInsts in {
foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
@@ -3599,7 +3606,11 @@ def : GCNPat <
>;
def : GCNPat <
(vecTy (DivergentBinFrag<build_vector> (Ty VGPR_16:$src0), (Ty undef))),
- (REG_SEQUENCE VGPR_32, $src0, lo16, (IMPLICIT_DEF), hi16)
+ (REG_SEQUENCE VGPR_32, $src0, lo16, (Ty (IMPLICIT_DEF)), hi16)
+>;
+def : GCNPat <
+ (vecTy (DivergentBinFrag<build_vector> (Ty undef), (Ty VGPR_16:$src1))),
+ (REG_SEQUENCE VGPR_32, (Ty (IMPLICIT_DEF)), lo16, (Ty VGPR_16:$src1), hi16)
>;
}
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index 36d1a3b..08d07c9 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -1302,6 +1302,7 @@ def VCSrc_f64 : SrcRegOrImm9 <VS_64, "OPERAND_REG_INLINE_C_FP64">;
def VCSrc_v2b16 : SrcRegOrImm9 <VS_32, "OPERAND_REG_INLINE_C_V2INT16">;
def VCSrc_v2bf16: SrcRegOrImm9 <VS_32, "OPERAND_REG_INLINE_C_V2BF16">;
def VCSrc_v2f16 : SrcRegOrImm9 <VS_32, "OPERAND_REG_INLINE_C_V2FP16">;
+def VCSrc_v2b32 : SrcRegOrImm9 <VS_64, "OPERAND_REG_INLINE_C_V2INT32">;
// True 16 Operands
def VCSrcT_b16 : SrcRegOrImm9_t16 <"OPERAND_REG_INLINE_C_INT16">;
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index f621f85..b128207 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -107,18 +107,6 @@ class VOP1_DPP_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> :
VOP_DPP_Pseudo <OpName, P, pattern> {
}
-class getVOP1Pat <SDPatternOperator node, VOPProfile P> : LetDummies {
- list<dag> ret =
- !if(P.HasModifiers,
- [(set P.DstVT:$vdst, (node (P.Src0VT (VOP3Mods P.Src0VT:$src0, i32:$src0_modifiers))))],
- !if(P.HasOMod,
- [(set P.DstVT:$vdst, (node (P.Src0VT (VOP3OMods P.Src0VT:$src0,
- i1:$clamp, i32:$omod))))],
- [(set P.DstVT:$vdst, (node (P.Src0VT P.Src0RC32:$src0)))]
- )
- );
-}
-
multiclass VOP1Inst <string opName, VOPProfile P,
SDPatternOperator node = null_frag, int VOPDOp = -1> {
// We only want to set this on the basic, non-SDWA or DPP forms.
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 19ce7f5..f4b6af6 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -1726,6 +1726,12 @@ multiclass VOP3CvtScaleSelInst<string OpName, VOPProfile P, SDPatternOperator no
}
}
+let HasExtVOP3DPP = 0, HasModifiers = 0 in {
+def VOP3_V2I32_I32_I32_V2I32 : VOP3_Profile<VOPProfile<[v2i32, i32, i32, v2i32]>>;
+def VOP3_V3I32_I32_I64_V2I32 : VOP3_Profile<VOPProfile<[v3i32, i32, i64, v2i32]>>;
+def VOP3_V4I32_I64_I64_V2I32 : VOP3_Profile<VOPProfile<[v4i32, i64, i64, v2i32]>>;
+}
+
let Src0RC64 = VSrc_NoInline_v2f16 in {
def VOP3_CVT_PK_F8_F16_Profile : VOP3_Profile<VOP_I16_V2F16>;
def VOP3_CVT_PK_F8_F16_True16_Profile : VOP3_Profile_True16<VOP3_CVT_PK_F8_F16_Profile>;
@@ -1771,6 +1777,12 @@ let SubtargetPredicate = isGFX1250Plus in {
defm V_CVT_SCALE_PK8_BF16_BF8 : VOP3CvtScaleSelInst<"v_cvt_scale_pk8_bf16_bf8", VOP_V8BF16_V2I32_I32, int_amdgcn_cvt_scale_pk8_bf16_bf8>;
defm V_CVT_SCALE_PK8_F32_FP8 : VOP3CvtScaleSelInst<"v_cvt_scale_pk8_f32_fp8", VOP_V8F32_V2I32_I32, int_amdgcn_cvt_scale_pk8_f32_fp8>;
defm V_CVT_SCALE_PK8_F32_BF8 : VOP3CvtScaleSelInst<"v_cvt_scale_pk8_f32_bf8", VOP_V8F32_V2I32_I32, int_amdgcn_cvt_scale_pk8_f32_bf8>;
+ defm V_CVT_SCALE_PK16_F16_FP6 : VOP3CvtScaleSelInst<"v_cvt_scale_pk16_f16_fp6", VOP_V16F16_V3I32_I32, int_amdgcn_cvt_scale_pk16_f16_fp6>;
+ defm V_CVT_SCALE_PK16_BF16_FP6 : VOP3CvtScaleSelInst<"v_cvt_scale_pk16_bf16_fp6", VOP_V16BF16_V3I32_I32, int_amdgcn_cvt_scale_pk16_bf16_fp6>;
+ defm V_CVT_SCALE_PK16_F16_BF6 : VOP3CvtScaleSelInst<"v_cvt_scale_pk16_f16_bf6", VOP_V16F16_V3I32_I32, int_amdgcn_cvt_scale_pk16_f16_bf6>;
+ defm V_CVT_SCALE_PK16_BF16_BF6 : VOP3CvtScaleSelInst<"v_cvt_scale_pk16_bf16_bf6", VOP_V16BF16_V3I32_I32, int_amdgcn_cvt_scale_pk16_bf16_bf6>;
+ defm V_CVT_SCALE_PK16_F32_FP6 : VOP3CvtScaleSelInst<"v_cvt_scale_pk16_f32_fp6", VOP_V16F32_V3I32_I32, int_amdgcn_cvt_scale_pk16_f32_fp6>;
+ defm V_CVT_SCALE_PK16_F32_BF6 : VOP3CvtScaleSelInst<"v_cvt_scale_pk16_f32_bf6", VOP_V16F32_V3I32_I32, int_amdgcn_cvt_scale_pk16_f32_bf6>;
} // End Constraints = "@earlyclobber $vdst"
defm V_CVT_SCALE_PK8_F16_FP4 : VOP3CvtScaleSelInst<"v_cvt_scale_pk8_f16_fp4", VOP_V8F16_I32_I32, int_amdgcn_cvt_scale_pk8_f16_fp4>;
@@ -1778,6 +1790,44 @@ let SubtargetPredicate = isGFX1250Plus in {
defm V_CVT_SCALE_PK8_F32_FP4 : VOP3CvtScaleSelInst<"v_cvt_scale_pk8_f32_fp4", VOP_V8F32_I32_I32, int_amdgcn_cvt_scale_pk8_f32_fp4>;
} // End ReadsModeReg = 0
+ let Constraints = "@earlyclobber $vdst" in {
+ let WaveSizePredicate = isWave32 in {
+ defm V_CVT_SCALEF32_PK8_FP8_BF16 : VOP3Inst<"v_cvt_scalef32_pk8_fp8_bf16", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V2I32_V8BF16_F32>, int_amdgcn_cvt_scalef32_pk8_fp8_bf16>;
+ defm V_CVT_SCALEF32_PK8_BF8_BF16 : VOP3Inst<"v_cvt_scalef32_pk8_bf8_bf16", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V2I32_V8BF16_F32>, int_amdgcn_cvt_scalef32_pk8_bf8_bf16>;
+ defm V_CVT_SCALEF32_PK8_FP8_F16 : VOP3Inst<"v_cvt_scalef32_pk8_fp8_f16", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V2I32_V8F16_F32>, int_amdgcn_cvt_scalef32_pk8_fp8_f16>;
+ defm V_CVT_SCALEF32_PK8_BF8_F16 : VOP3Inst<"v_cvt_scalef32_pk8_bf8_f16", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V2I32_V8F16_F32>, int_amdgcn_cvt_scalef32_pk8_bf8_f16>;
+ defm V_CVT_SCALEF32_PK8_FP8_F32 : VOP3Inst<"v_cvt_scalef32_pk8_fp8_f32", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V2I32_V8F32_F32>, int_amdgcn_cvt_scalef32_pk8_fp8_f32>;
+ defm V_CVT_SCALEF32_PK8_BF8_F32 : VOP3Inst<"v_cvt_scalef32_pk8_bf8_f32", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V2I32_V8F32_F32>, int_amdgcn_cvt_scalef32_pk8_bf8_f32>;
+ defm V_CVT_SCALEF32_PK8_FP4_F32 : VOP3Inst<"v_cvt_scalef32_pk8_fp4_f32", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_I32_V8F32_F32>, int_amdgcn_cvt_scalef32_pk8_fp4_f32>;
+ defm V_CVT_SCALEF32_PK8_FP4_F16 : VOP3Inst<"v_cvt_scalef32_pk8_fp4_f16", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_I32_V8F16_F32>, int_amdgcn_cvt_scalef32_pk8_fp4_f16>;
+ defm V_CVT_SCALEF32_PK8_FP4_BF16 : VOP3Inst<"v_cvt_scalef32_pk8_fp4_bf16", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_I32_V8BF16_F32>, int_amdgcn_cvt_scalef32_pk8_fp4_bf16>;
+ } // End WaveSizePredicate = isWave32
+ defm V_CVT_SCALEF32_PK16_FP6_F32 : VOP3Inst<"v_cvt_scalef32_pk16_fp6_f32", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V3I32_V16F32_F32>, int_amdgcn_cvt_scalef32_pk16_fp6_f32>;
+ defm V_CVT_SCALEF32_PK16_BF6_F32 : VOP3Inst<"v_cvt_scalef32_pk16_bf6_f32", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V3I32_V16F32_F32>, int_amdgcn_cvt_scalef32_pk16_bf6_f32>;
+ defm V_CVT_SCALEF32_PK16_FP6_F16 : VOP3Inst<"v_cvt_scalef32_pk16_fp6_f16", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V3I32_V16F16_F32>, int_amdgcn_cvt_scalef32_pk16_fp6_f16>;
+ defm V_CVT_SCALEF32_PK16_BF6_F16 : VOP3Inst<"v_cvt_scalef32_pk16_bf6_f16", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V3I32_V16F16_F32>, int_amdgcn_cvt_scalef32_pk16_bf6_f16>;
+ defm V_CVT_SCALEF32_PK16_FP6_BF16 : VOP3Inst<"v_cvt_scalef32_pk16_fp6_bf16", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V3I32_V16BF16_F32>, int_amdgcn_cvt_scalef32_pk16_fp6_bf16>;
+ defm V_CVT_SCALEF32_PK16_BF6_BF16 : VOP3Inst<"v_cvt_scalef32_pk16_bf6_bf16", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V3I32_V16BF16_F32>, int_amdgcn_cvt_scalef32_pk16_bf6_bf16>;
+
+ let WaveSizePredicate = isWave32 in {
+ defm V_CVT_SCALEF32_SR_PK8_FP8_BF16 : VOP3Inst<"v_cvt_scalef32_sr_pk8_fp8_bf16", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V2I32_V8BF16_I32_F32>, int_amdgcn_cvt_scalef32_sr_pk8_fp8_bf16>;
+ defm V_CVT_SCALEF32_SR_PK8_BF8_BF16 : VOP3Inst<"v_cvt_scalef32_sr_pk8_bf8_bf16", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V2I32_V8BF16_I32_F32>, int_amdgcn_cvt_scalef32_sr_pk8_bf8_bf16>;
+ defm V_CVT_SCALEF32_SR_PK8_FP8_F16 : VOP3Inst<"v_cvt_scalef32_sr_pk8_fp8_f16", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V2I32_V8F16_I32_F32>, int_amdgcn_cvt_scalef32_sr_pk8_fp8_f16>;
+ defm V_CVT_SCALEF32_SR_PK8_BF8_F16 : VOP3Inst<"v_cvt_scalef32_sr_pk8_bf8_f16", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V2I32_V8F16_I32_F32>, int_amdgcn_cvt_scalef32_sr_pk8_bf8_f16>;
+ defm V_CVT_SCALEF32_SR_PK8_FP8_F32 : VOP3Inst<"v_cvt_scalef32_sr_pk8_fp8_f32", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V2I32_V8F32_I32_F32>, int_amdgcn_cvt_scalef32_sr_pk8_fp8_f32>;
+ defm V_CVT_SCALEF32_SR_PK8_BF8_F32 : VOP3Inst<"v_cvt_scalef32_sr_pk8_bf8_f32", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V2I32_V8F32_I32_F32>, int_amdgcn_cvt_scalef32_sr_pk8_bf8_f32>;
+ defm V_CVT_SCALEF32_SR_PK8_FP4_F32 : VOP3Inst<"v_cvt_scalef32_sr_pk8_fp4_f32", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_I32_V8F32_I32_F32>, int_amdgcn_cvt_scalef32_sr_pk8_fp4_f32>;
+ defm V_CVT_SCALEF32_SR_PK8_FP4_F16 : VOP3Inst<"v_cvt_scalef32_sr_pk8_fp4_f16", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_I32_V8F16_I32_F32>, int_amdgcn_cvt_scalef32_sr_pk8_fp4_f16>;
+ defm V_CVT_SCALEF32_SR_PK8_FP4_BF16 : VOP3Inst<"v_cvt_scalef32_sr_pk8_fp4_bf16", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_I32_V8BF16_I32_F32>, int_amdgcn_cvt_scalef32_sr_pk8_fp4_bf16>;
+ } // End WaveSizePredicate = isWave32
+ defm V_CVT_SCALEF32_SR_PK16_BF6_BF16 : VOP3Inst<"v_cvt_scalef32_sr_pk16_bf6_bf16", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V3I32_V16BF16_I32_F32>, int_amdgcn_cvt_scalef32_sr_pk16_bf6_bf16>;
+ defm V_CVT_SCALEF32_SR_PK16_BF6_F16 : VOP3Inst<"v_cvt_scalef32_sr_pk16_bf6_f16", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V3I32_V16F16_I32_F32>, int_amdgcn_cvt_scalef32_sr_pk16_bf6_f16>;
+ defm V_CVT_SCALEF32_SR_PK16_BF6_F32 : VOP3Inst<"v_cvt_scalef32_sr_pk16_bf6_f32", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V3I32_V16F32_I32_F32>, int_amdgcn_cvt_scalef32_sr_pk16_bf6_f32>;
+ defm V_CVT_SCALEF32_SR_PK16_FP6_BF16 : VOP3Inst<"v_cvt_scalef32_sr_pk16_fp6_bf16", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V3I32_V16BF16_I32_F32>, int_amdgcn_cvt_scalef32_sr_pk16_fp6_bf16>;
+ defm V_CVT_SCALEF32_SR_PK16_FP6_F16 : VOP3Inst<"v_cvt_scalef32_sr_pk16_fp6_f16", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V3I32_V16F16_I32_F32>, int_amdgcn_cvt_scalef32_sr_pk16_fp6_f16>;
+ defm V_CVT_SCALEF32_SR_PK16_FP6_F32 : VOP3Inst<"v_cvt_scalef32_sr_pk16_fp6_f32", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V3I32_V16F32_I32_F32>, int_amdgcn_cvt_scalef32_sr_pk16_fp6_f32>;
+ } // End Constraints = "@earlyclobber $vdst"
+
let True16Predicate = UseRealTrue16Insts in {
def : Cvt_SR_F8_ByteSel_Pat<int_amdgcn_cvt_sr_fp8_f16, V_CVT_SR_FP8_F16_t16_e64, f16>;
def : Cvt_SR_F8_ByteSel_Pat<int_amdgcn_cvt_sr_bf8_f16, V_CVT_SR_BF8_F16_t16_e64, f16>;
@@ -1788,6 +1838,12 @@ let SubtargetPredicate = isGFX1250Plus in {
}
} // End SubtargetPredicate = isGFX1250Plus
+let SubtargetPredicate = HasTensorCvtLutInsts in {
+ defm V_PERM_PK16_B4_U4 : VOP3Inst<"v_perm_pk16_b4_u4", VOP3_V2I32_I32_I32_V2I32, int_amdgcn_perm_pk16_b4_u4>;
+ defm V_PERM_PK16_B6_U4 : VOP3Inst<"v_perm_pk16_b6_u4", VOP3_V3I32_I32_I64_V2I32, int_amdgcn_perm_pk16_b6_u4>;
+ defm V_PERM_PK16_B8_U4 : VOP3Inst<"v_perm_pk16_b8_u4", VOP3_V4I32_I64_I64_V2I32, int_amdgcn_perm_pk16_b8_u4>;
+} // End SubtargetPredicate = HasTensorCvtLutInsts
+
class Cvt_Scale_Sr_F32ToBF16F16_Pat<SDPatternOperator node, VOP3_Pseudo inst, ValueType DstTy> : GCNPat<
(DstTy (node DstTy:$vdst_in, f32:$src0, i32:$src1, timm:$word_sel)),
(inst (DstSelToOpSelXForm $word_sel), $src0, 0, $src1, VGPR_32:$vdst_in)
@@ -2186,6 +2242,9 @@ let AssemblerPredicate = isGFX11Plus in {
}
// These instructions differ from GFX12 variant by supporting DPP:
+defm V_PERM_PK16_B4_U4 : VOP3Only_Real_Base_gfx1250<0x23f>;
+defm V_PERM_PK16_B6_U4 : VOP3Only_Real_Base_gfx1250<0x242>;
+defm V_PERM_PK16_B8_U4 : VOP3Only_Real_Base_gfx1250<0x243>;
defm V_LSHL_ADD_U64 : VOP3Only_Realtriple_gfx1250<0x252>;
defm V_ASHR_PK_I8_I32 : VOP3Only_Realtriple_gfx1250<0x290>;
defm V_ASHR_PK_U8_I32 : VOP3Only_Realtriple_gfx1250<0x291>;
@@ -2198,6 +2257,42 @@ defm V_CVT_SCALE_PK8_F32_FP8 : VOP3Only_ScaleSel_Real_gfx1250<0x2aa>;
defm V_CVT_SCALE_PK8_F16_BF8 : VOP3Only_ScaleSel_Real_gfx1250<0x2ab>;
defm V_CVT_SCALE_PK8_BF16_BF8 : VOP3Only_ScaleSel_Real_gfx1250<0x2ac>;
defm V_CVT_SCALE_PK8_F32_BF8 : VOP3Only_ScaleSel_Real_gfx1250<0x2ad>;
+defm V_CVT_SCALEF32_PK8_FP4_F32 : VOP3Only_Real_Base_gfx1250<0x2b0>;
+defm V_CVT_SCALEF32_PK8_FP4_F16 : VOP3Only_Real_Base_gfx1250<0x2b3>;
+defm V_CVT_SCALEF32_PK8_FP8_BF16 : VOP3Only_Real_Base_gfx1250<0x2b4>;
+defm V_CVT_SCALEF32_PK8_BF8_BF16 : VOP3Only_Real_Base_gfx1250<0x2b5>;
+defm V_CVT_SCALEF32_PK8_FP4_BF16 : VOP3Only_Real_Base_gfx1250<0x2b8>;
+defm V_CVT_SCALEF32_PK8_FP8_F32 : VOP3Only_Real_Base_gfx1250<0x2c3>;
+defm V_CVT_SCALEF32_PK8_FP8_F16 : VOP3Only_Real_Base_gfx1250<0x2c4>;
+defm V_CVT_SCALEF32_PK8_BF8_F32 : VOP3Only_Real_Base_gfx1250<0x2c5>;
+defm V_CVT_SCALEF32_PK8_BF8_F16 : VOP3Only_Real_Base_gfx1250<0x2c6>;
+defm V_CVT_SCALE_PK16_F16_FP6 : VOP3Only_ScaleSel_Real_gfx1250<0x2c7>;
+defm V_CVT_SCALE_PK16_BF16_FP6 : VOP3Only_ScaleSel_Real_gfx1250<0x2c8>;
+defm V_CVT_SCALE_PK16_F32_FP6 : VOP3Only_ScaleSel_Real_gfx1250<0x2c9>;
+defm V_CVT_SCALE_PK16_F16_BF6 : VOP3Only_ScaleSel_Real_gfx1250<0x2ca>;
+defm V_CVT_SCALE_PK16_BF16_BF6 : VOP3Only_ScaleSel_Real_gfx1250<0x2cb>;
+defm V_CVT_SCALE_PK16_F32_BF6 : VOP3Only_ScaleSel_Real_gfx1250<0x2cc>;
+defm V_CVT_SCALEF32_PK16_FP6_F32 : VOP3Only_Real_Base_gfx1250<0x2cd>;
+defm V_CVT_SCALEF32_PK16_BF6_F32 : VOP3Only_Real_Base_gfx1250<0x2ce>;
+defm V_CVT_SCALEF32_PK16_FP6_F16 : VOP3Only_Real_Base_gfx1250<0x2cf>;
+defm V_CVT_SCALEF32_PK16_BF6_F16 : VOP3Only_Real_Base_gfx1250<0x2d0>;
+defm V_CVT_SCALEF32_PK16_FP6_BF16 : VOP3Only_Real_Base_gfx1250<0x2d1>;
+defm V_CVT_SCALEF32_PK16_BF6_BF16 : VOP3Only_Real_Base_gfx1250<0x2d2>;
+defm V_CVT_SCALEF32_SR_PK16_FP6_F32 : VOP3Only_Real_Base_gfx1250<0x2d3>;
+defm V_CVT_SCALEF32_SR_PK16_BF6_F32 : VOP3Only_Real_Base_gfx1250<0x2d4>;
+defm V_CVT_SCALEF32_SR_PK16_FP6_F16 : VOP3Only_Real_Base_gfx1250<0x2d5>;
+defm V_CVT_SCALEF32_SR_PK16_BF6_F16 : VOP3Only_Real_Base_gfx1250<0x2d6>;
+defm V_CVT_SCALEF32_SR_PK16_FP6_BF16 : VOP3Only_Real_Base_gfx1250<0x2d7>;
+defm V_CVT_SCALEF32_SR_PK16_BF6_BF16 : VOP3Only_Real_Base_gfx1250<0x2d8>;
+defm V_CVT_SCALEF32_SR_PK8_FP4_F32 : VOP3Only_Real_Base_gfx1250<0x297>;
+defm V_CVT_SCALEF32_SR_PK8_FP8_F32 : VOP3Only_Real_Base_gfx1250<0x298>;
+defm V_CVT_SCALEF32_SR_PK8_BF8_F32 : VOP3Only_Real_Base_gfx1250<0x299>;
+defm V_CVT_SCALEF32_SR_PK8_FP4_F16 : VOP3Only_Real_Base_gfx1250<0x2b9>;
+defm V_CVT_SCALEF32_SR_PK8_FP4_BF16 : VOP3Only_Real_Base_gfx1250<0x2bc>;
+defm V_CVT_SCALEF32_SR_PK8_FP8_F16 : VOP3Only_Real_Base_gfx1250<0x2bf>;
+defm V_CVT_SCALEF32_SR_PK8_FP8_BF16 : VOP3Only_Real_Base_gfx1250<0x2c0>;
+defm V_CVT_SCALEF32_SR_PK8_BF8_F16 : VOP3Only_Real_Base_gfx1250<0x2c1>;
+defm V_CVT_SCALEF32_SR_PK8_BF8_BF16 : VOP3Only_Real_Base_gfx1250<0x2c2>;
defm V_CVT_PK_BF16_F32 : VOP3Only_Realtriple_gfx1250<0x36d>;
defm V_CVT_SR_PK_BF16_F32 : VOP3Only_Realtriple_gfx1250<0x36e>;
defm V_CVT_PK_F16_F32 : VOP3Only_Realtriple_gfx1250<0x36f>;
diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
index 95fcd4a..9264935 100644
--- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -557,11 +557,11 @@ multiclass VOP3PDOTIUInst <string OpName, SDPatternOperator intrinsic_node> {
null_frag, 1>;
// Dot-iu instructions consider input as signed if imod neg bits are set. Thus
// Dot-iu Intrinsics have extra operands and require separate codegen pattern.
- def : GCNPat < (intrinsic_node (VOP3PModsNeg i32:$src0_mods), i32:$src0,
- (VOP3PModsNeg i32:$src1_mods), i32:$src1,
+ def : GCNPat < (intrinsic_node timm:$src0_mods, i32:$src0,
+ timm:$src1_mods, i32:$src1,
i32:$src2, (i1 timm:$clamp)),
- (!cast<Instruction>(NAME) $src0_mods, i32:$src0,
- $src1_mods, i32:$src1,
+ (!cast<Instruction>(NAME) (VOP3PModsNeg $src0_mods), i32:$src0,
+ (VOP3PModsNeg $src1_mods), i32:$src1,
(i32 8), i32:$src2, i1:$clamp)
>;
}
@@ -1302,11 +1302,11 @@ class WMMAOpSelPat<Instruction Inst, SDPatternOperator node, VOPProfile P> :
class WMMAUIClampPat<Instruction Inst, SDPatternOperator node, VOPProfile P> :
GCNPat < (P.DstVT (node
- (VOP3PModsNeg i32:$src0_modifiers), (P.Src0VT P.Src0VT:$src0),
- (VOP3PModsNeg i32:$src1_modifiers), (P.Src1VT P.Src1VT:$src1),
+ timm:$src0_modifiers, (P.Src0VT P.Src0VT:$src0),
+ timm:$src1_modifiers, (P.Src1VT P.Src1VT:$src1),
(P.Src2VT P.Src2VT:$src2), (i1 timm:$clamp)
)),
- (P.DstVT (Inst i32:$src0_modifiers, P.Src0VT:$src0, i32:$src1_modifiers, P.Src1VT:$src1, (i32 8), P.Src2VT:$src2, i1:$clamp))
+ (P.DstVT (Inst (VOP3PModsNeg $src0_modifiers), P.Src0VT:$src0, (VOP3PModsNeg $src1_modifiers), P.Src1VT:$src1, (i32 8), P.Src2VT:$src2, i1:$clamp))
>;
class WMMAOpcodeMapping<Instruction TwoAddr, Instruction ThreeAddr> {
@@ -1407,9 +1407,9 @@ let WaveSizePredicate = isWave64 in {
}
class VOP3PWMMA_Profile<list<ValueType> ArgTy, bit _IsSWMMAC, int _IndexType,
- bit _IsIU, bit _IsFP8BF8XF32, bit _Has_ImodOp = 0,
- bit _HasMatrixFMT = 0, bit _HasMatrixReuse = 0,
- bit _IsF4 = 0>
+ bit _IsIU, bit _IsFP8BF8XF32, bit _Has_ImodOp = 0,
+ bit _HasMatrixFMT = 0, bit _HasMatrixScale = 0,
+ bit _Scale16 = 0, bit _HasMatrixReuse = 0, bit _IsF4 = 0>
: VOP3P_Profile<VOPProfile<ArgTy>> {
bit IsIU = _IsIU;
bit NoABMods = !or(_IsFP8BF8XF32, _IsF4); // No IMOD support for A and B
@@ -1417,6 +1417,8 @@ class VOP3PWMMA_Profile<list<ValueType> ArgTy, bit _IsSWMMAC, int _IndexType,
int IndexType = _IndexType;
let HasMatrixFMT = _HasMatrixFMT;
+ let HasMatrixScale = _HasMatrixScale;
+ bit Scale16 = _Scale16;
let HasMatrixReuse = _HasMatrixReuse;
bit HasIModOp = _Has_ImodOp;
@@ -1455,6 +1457,7 @@ class VOP3PWMMA_Profile<list<ValueType> ArgTy, bit _IsSWMMAC, int _IndexType,
IsC_F16: "_f16",
IsC_BF16: "_bf16",
1: "_b32")));
+ ValueType ScaleTy = !if(Scale16, i64, i32);
// For f16 and bf16 matrices A and B, each element can be modified by
// fneg(neg_lo,neg_hi = 1). For f32 and f64, neg_lo[0:1] is allowed, but
@@ -1516,6 +1519,13 @@ class VOP3PWMMA_Profile<list<ValueType> ArgTy, bit _IsSWMMAC, int _IndexType,
!eq(IndexType, 32): (ins IndexKey32bit:$index_key_32bit));
dag MatrixFMT = !if(HasMatrixFMT, (ins MatrixAFMT:$matrix_a_fmt, MatrixBFMT:$matrix_b_fmt),
(ins));
+ dag MatrixScaleSrc = !if(HasMatrixScale,
+ !if(Scale16, (ins VCSrc_b64:$scale_src0, VCSrc_b64:$scale_src1),
+ (ins VCSrc_b32:$scale_src0, VCSrc_b32:$scale_src1)),
+ (ins));
+ dag MatrixScale = !if(HasMatrixScale, (ins MatrixAScale:$matrix_a_scale, MatrixBScale:$matrix_b_scale,
+ MatrixAScaleFmt:$matrix_a_scale_fmt, MatrixBScaleFmt:$matrix_b_scale_fmt),
+ (ins));
dag MatrixReuse = !if(HasMatrixReuse, (ins MatrixAReuse:$matrix_a_reuse, MatrixBReuse:$matrix_b_reuse), (ins));
dag Clamp = !if(HasClamp, (ins Clamp0:$clamp), (ins));
dag Neg = !cond(!and(NegLoAny, NegHiAny) : (ins neg_lo0:$neg_lo, neg_hi0:$neg_hi),
@@ -1529,7 +1539,7 @@ class VOP3PWMMA_Profile<list<ValueType> ArgTy, bit _IsSWMMAC, int _IndexType,
(ins VRegSrc_64:$src2),
(ins VRegSrc_32:$src2)),
IndexKey)),
- MatrixFMT, MatrixReuse, Clamp, Neg);
+ MatrixScaleSrc, MatrixFMT, MatrixScale, MatrixReuse, Clamp, Neg);
// asm
@@ -1538,57 +1548,59 @@ class VOP3PWMMA_Profile<list<ValueType> ArgTy, bit _IsSWMMAC, int _IndexType,
!eq(IndexType, 16) : "$index_key_16bit",
!eq(IndexType, 32) : "$index_key_32bit");
string MatrxFMTAsm = !if(HasMatrixFMT, "$matrix_a_fmt$matrix_b_fmt", "");
+ string MatrixScaleSrcAsm = !if(HasMatrixScale, ", $scale_src0, $scale_src1", "");
+ string MatrixScaleAsm = !if(HasMatrixScale, "$matrix_a_scale$matrix_b_scale$matrix_a_scale_fmt$matrix_b_scale_fmt", "");
string MatrixReuseAsm = !if(HasMatrixReuse, "$matrix_a_reuse$matrix_b_reuse", "");
string ClampAsm = !if(HasClamp, "$clamp", "");
string NegAsm = !cond(!and(NegLoAny, NegHiAny) : "$neg_lo$neg_hi",
!and(NegLoAny, !not(NegHiAny)) : "$neg_lo",
!and(!not(NegLoAny), !not(NegHiAny)) : "");
- let AsmVOP3P = "$vdst, $src0, $src1, $src2"#IndexKeyAsm#MatrxFMTAsm#MatrixReuseAsm#NegAsm#ClampAsm;
+ let AsmVOP3P = "$vdst, $src0, $src1, $src2"#IndexKeyAsm#MatrixScaleSrcAsm#MatrxFMTAsm#MatrixScaleAsm#MatrixReuseAsm#NegAsm#ClampAsm;
// isel patterns
bit IsAB_BF16_IMod0 = !and(IsAB_BF16, !not(HasIModOp));
bit IsAB_F16_IMod0 = !and(IsAB_F16, !not(HasIModOp));
bit IsAB_F32F64_IMod1 = !and(!or(IsAB_F64, IsAB_F32), HasIModOp);
bit IsAB_F16BF16_IMod1 = !and(!or(IsAB_F16, IsAB_BF16), HasIModOp);
- dag Src0InPat = !cond(IsAB_F32F64_IMod1 : (ins (VOP3PModsNeg i32:$src0_modifiers), Src0VT:$src0),
- IsAB_F16BF16_IMod1 : (ins (VOP3PModsNegs i32:$src0_modifiers), Src0VT:$src0),
+ dag Src0InPat = !cond(IsAB_F32F64_IMod1 : (ins timm:$src0_modifiers, Src0VT:$src0),
+ IsAB_F16BF16_IMod1 : (ins timm:$src0_modifiers, Src0VT:$src0),
IsAB_F16_IMod0 : (ins (Src0VT (WMMAModsF16Neg Src0VT:$src0, i32:$src0_modifiers))),
IsAB_BF16_IMod0 : (ins Src0VT:$src0),
- IsIU : (ins (VOP3PModsNeg i32:$src0_modifiers), Src0VT:$src0),
+ IsIU : (ins timm:$src0_modifiers, Src0VT:$src0),
HasMatrixFMT : (ins timm:$matrix_a_fmt, Src0VT:$src0),
NoABMods : (ins Src0VT:$src0));
- dag Src0OutPat = !cond(IsAB_F32F64_IMod1 : (ins i32:$src0_modifiers, Src0VT:$src0),
- IsAB_F16BF16_IMod1 : (ins i32:$src0_modifiers, Src0VT:$src0),
+ dag Src0OutPat = !cond(IsAB_F32F64_IMod1 : (ins (VOP3PModsNeg $src0_modifiers), Src0VT:$src0),
+ IsAB_F16BF16_IMod1 : (ins (VOP3PModsNegs $src0_modifiers), Src0VT:$src0),
IsAB_F16_IMod0 : (ins i32:$src0_modifiers, Src0VT:$src0),
IsAB_BF16_IMod0 : (ins (i32 8), Src0VT:$src0),
- IsIU : (ins i32:$src0_modifiers, Src0VT:$src0),
+ IsIU : (ins (VOP3PModsNeg $src0_modifiers), Src0VT:$src0),
NoABMods : (ins Src0VT:$src0));
- dag Src1InPat = !cond(IsAB_F32F64_IMod1 : (ins (VOP3PModsNeg i32:$src1_modifiers), Src1VT:$src1),
- IsAB_F16BF16_IMod1 : (ins (VOP3PModsNegs i32:$src1_modifiers), Src1VT:$src1),
+ dag Src1InPat = !cond(IsAB_F32F64_IMod1 : (ins timm:$src1_modifiers, Src1VT:$src1),
+ IsAB_F16BF16_IMod1 : (ins timm:$src1_modifiers, Src1VT:$src1),
IsAB_F16_IMod0 : (ins (Src1VT (WMMAModsF16Neg Src1VT:$src1, i32:$src1_modifiers))),
IsAB_BF16_IMod0 : (ins Src1VT:$src1),
- IsIU : (ins (VOP3PModsNeg i32:$src1_modifiers), Src1VT:$src1),
+ IsIU : (ins timm:$src1_modifiers, Src1VT:$src1),
HasMatrixFMT : (ins timm:$matrix_b_fmt, Src1VT:$src1),
NoABMods : (ins Src1VT:$src1));
- dag Src1OutPat = !cond(IsAB_F32F64_IMod1 : (ins i32:$src1_modifiers, Src1VT:$src1),
- IsAB_F16BF16_IMod1 : (ins i32:$src1_modifiers, Src1VT:$src1),
+ dag Src1OutPat = !cond(IsAB_F32F64_IMod1 : (ins (VOP3PModsNeg $src1_modifiers), Src1VT:$src1),
+ IsAB_F16BF16_IMod1 : (ins (VOP3PModsNegs $src1_modifiers), Src1VT:$src1),
IsAB_F16_IMod0 : (ins i32:$src1_modifiers, Src1VT:$src1),
IsAB_BF16_IMod0 : (ins (i32 8), Src1VT:$src1),
- IsIU : (ins i32:$src1_modifiers, Src1VT:$src1),
+ IsIU : (ins (VOP3PModsNeg $src1_modifiers), Src1VT:$src1),
NoABMods : (ins Src1VT:$src1));
bit IsC_IMod1 = !and(HasIModOp, IsWMMA, !not(IsIU), !not(IsXF32));
bit IsC_F32_IMod0 = !and(IsC_F32, !not(HasIModOp));
bit IsC_F16_IMod0 = !and(IsC_F16, !not(HasIModOp));
bit IsC_BF16_IMod0 = !and(IsC_BF16, !not(HasIModOp));
bit IsIUXF32 = !or(IsIU, IsXF32);
- dag Src2InPatWmma = !cond(IsC_IMod1 : (ins (VOP3PModsNegAbs i32:$src2_modifiers), Src2VT:$src2),
+ dag Src2InPatWmma = !cond(IsC_IMod1 : (ins timm:$src2_modifiers, Src2VT:$src2),
IsC_F32_IMod0 : (ins (Src2VT (WMMAModsF32NegAbs Src2VT:$src2, i32:$src2_modifiers))),
IsC_F16_IMod0 : (ins (Src2VT (WMMAModsF16NegAbs Src2VT:$src2, i32:$src2_modifiers))),
IsC_BF16_IMod0 : (ins Src2VT:$src2),
IsIUXF32 : (ins Src2VT:$src2),
IsSWMMAC : (ins));
- dag Src2OutPatWmma = !cond(IsC_IMod1 : (ins i32:$src2_modifiers, Src2VT:$src2),
+ dag Src2OutPatWmma = !cond(IsC_IMod1 : (ins (VOP3PModsNegAbs $src2_modifiers), Src2VT:$src2),
IsC_F32_IMod0 : (ins i32:$src2_modifiers, Src2VT:$src2),
IsC_F16_IMod0 : (ins i32:$src2_modifiers, Src2VT:$src2),
IsC_BF16_IMod0 : (ins (i32 8), Src2VT:$src2),
@@ -1604,22 +1616,29 @@ class VOP3PWMMA_Profile<list<ValueType> ArgTy, bit _IsSWMMAC, int _IndexType,
!eq(IndexType, 16): (ins i32:$src2, i32:$index_key_16bit),
!eq(IndexType, 32): (ins i64:$src2, i32:$index_key_32bit));
dag MatrixFMTOutPat = !if(HasMatrixFMT, (ins i32:$matrix_a_fmt, i32:$matrix_b_fmt), (ins));
- dag Src2InlineInPat = !con(!if(IsC_IMod1, (ins (VOP3PModsNegAbs i32:$src2_modifiers)), (ins)), (ins (Src2VT (WMMAVISrc Src2VT:$src2))));
- dag Src2InlineOutPat = !con(!if(IsIUXF32, (ins), !if(IsC_IMod1, (ins i32:$src2_modifiers), (ins (i32 8)))), (ins Src2VT:$src2));
+ dag Src2InlineInPat = !con(!if(IsC_IMod1, (ins timm:$src2_modifiers), (ins)), (ins (Src2VT (WMMAVISrc Src2VT:$src2))));
+ dag Src2InlineOutPat = !con(!if(IsIUXF32, (ins), !if(IsC_IMod1, (ins (VOP3PModsNegAbs $src2_modifiers)), (ins (i32 8)))), (ins Src2VT:$src2));
+ dag MatrixScaleInPat = !if(HasMatrixScale, (ins timm:$matrix_a_scale, timm:$matrix_a_scale_fmt, ScaleTy:$scale_src0,
+ timm:$matrix_b_scale, timm:$matrix_b_scale_fmt, ScaleTy:$scale_src1),
+ (ins));
dag MatrixReuseInPat = !if(HasMatrixReuse, (ins timm:$matrix_a_reuse, timm:$matrix_b_reuse), (ins));
+ dag MatrixScaleOutSrcPat = !if(HasMatrixScale, (ins ScaleTy:$scale_src0, ScaleTy:$scale_src1), (ins));
+ dag MatrixScaleOutModPat = !if(HasMatrixScale, (ins i32:$matrix_a_scale, i32:$matrix_b_scale, i32:$matrix_a_scale_fmt, i32:$matrix_b_scale_fmt), (ins));
dag MatrixReuseOutModPat = !if(HasMatrixReuse, (ins i1:$matrix_a_reuse, i1:$matrix_b_reuse), (ins));
- dag WmmaInPat = !con(Src0InPat, Src1InPat, Src2InPatWmma, MatrixReuseInPat, ClampPat);
- dag WmmaOutPat = !con(Src0OutPat, Src1OutPat, Src2OutPatWmma, MatrixFMTOutPat, MatrixReuseOutModPat, ClampPat);
+ dag WmmaInPat = !con(Src0InPat, Src1InPat, Src2InPatWmma, MatrixScaleInPat, MatrixReuseInPat, ClampPat);
+ dag WmmaOutPat = !con(Src0OutPat, Src1OutPat, Src2OutPatWmma, MatrixScaleOutSrcPat, MatrixFMTOutPat,
+ MatrixScaleOutModPat, MatrixReuseOutModPat, ClampPat);
dag SwmmacInPat = !con(Src0InPat, Src1InPat, (ins Src2VT:$srcTiedDef), IndexInPat, MatrixReuseInPat, ClampPat);
dag SwmmacOutPat = !con(Src0OutPat, Src1OutPat, (ins Src2VT:$srcTiedDef), IndexOutPat, MatrixReuseOutModPat, ClampPat);
// wmma pattern where src2 is inline imm uses _threeaddr pseudo,
// can't use _twoaddr since it would violate src2 tied to vdst constraint.
- dag WmmaInlineInPat = !con(Src0InPat, Src1InPat, Src2InlineInPat, MatrixReuseInPat, ClampPat);
- dag WmmaInlineOutPat = !con(Src0OutPat, Src1OutPat, Src2InlineOutPat, MatrixFMTOutPat, MatrixReuseOutModPat, ClampPat);
+ dag WmmaInlineInPat = !con(Src0InPat, Src1InPat, Src2InlineInPat, MatrixScaleInPat, MatrixReuseInPat, ClampPat);
+ dag WmmaInlineOutPat = !con(Src0OutPat, Src1OutPat, Src2InlineOutPat, MatrixScaleOutSrcPat,
+ MatrixFMTOutPat, MatrixScaleOutModPat, MatrixReuseOutModPat, ClampPat);
}
def WMMAInstInfoTable : GenericTable {
@@ -1645,11 +1664,15 @@ multiclass WMMAInstGFX12<string Instr, VOP3PWMMA_Profile WMMAProfile, string Pse
let Constraints = WMMAConstraints2Addr, isConvertibleToThreeAddress = 1 in
def _twoaddr : VOP3P_Pseudo<Instr, WMMAProfile>, WMMAInstInfo {
let PseudoInstr = Instr#PseudoInstrSuffix;
+ let FixedSize = WMMAProfile.HasMatrixScale;
+ let Size = !if(WMMAProfile.HasMatrixScale, 16, 8);
}
let Constraints = WMMAConstraints3Addr, SchedRW = [Write32Bit, Write32Bit] in
def _threeaddr : VOP3P_Pseudo<Instr, WMMAProfile>, WMMAInstInfo {
let PseudoInstr = Instr#PseudoInstrSuffix;
+ let FixedSize = WMMAProfile.HasMatrixScale;
+ let Size = !if(WMMAProfile.HasMatrixScale, 16, 8);
}
}
@@ -1728,39 +1751,53 @@ def F32_FP8BF8_SWMMAC_w64 : VOP3PWMMA_Profile<[v4f32, i32, v2i32, v4f32], 1,
// *** IU4X32_SWMMAC_w64 lanes 0-31 will have 8xi4 remaining lanes are ignored
// for matrix A, index is i16; Matrix B uses all lanes
-def F32_F32_WMMA_w32 : VOP3PWMMA_Profile<[v8f32, v2f32, v2f32, v8f32], 0, 0, 0, 0, 1, 0, 1>;
-def F32_BF16X32_WMMA_w32 : VOP3PWMMA_Profile<[v8f32, v16bf16, v16bf16, v8f32], 0, 0, 0, 0, 1, 0, 1>;
-def F32_F16X32_WMMA_w32 : VOP3PWMMA_Profile<[v8f32, v16f16, v16f16, v8f32], 0, 0, 0, 0, 1, 0, 1>;
-def F16_F16X32_WMMA_w32 : VOP3PWMMA_Profile<[v8f16, v16f16, v16f16, v8f16], 0, 0, 0, 0, 1, 0, 1>;
-def BF16_BF16X32_WMMA_w32 : VOP3PWMMA_Profile<[v8bf16, v16bf16, v16bf16, v8bf16], 0, 0, 0, 0, 1, 0, 1>;
-def BF16F32_BF16_WMMA_w32 : VOP3PWMMA_Profile<[v8bf16, v16bf16, v16bf16, v8f32], 0, 0, 0, 0, 1, 0, 1>;
-def F32_FP8BF8X64_WMMA_w32 : VOP3PWMMA_Profile<[v8f32, v8i32, v8i32, v8f32], 0, 0, 0, 1, 1, 0, 1>;
-def F32_FP8BF8X128_WMMA_w32 : VOP3PWMMA_Profile<[v8f32, v16i32, v16i32, v8f32], 0, 0, 0, 1, 1, 0, 1>;
-def F16_FP8BF8X64_WMMA_w32 : VOP3PWMMA_Profile<[v8f16, v8i32, v8i32, v8f16], 0, 0, 0, 1, 1, 0, 1>;
-def F16_FP8BF8X128_WMMA_w32 : VOP3PWMMA_Profile<[v8f16, v16i32, v16i32, v8f16], 0, 0, 0, 1, 1, 0, 1>;
-def F32_32X16X128_F4_WMMA_w32 : VOP3PWMMA_Profile<[v16f32, v16i32, v8i32, v16f32], 0, 0, 0, 0, 1, 0, 0, 1>;
-def I32_IU8X64_WMMA_w32 : VOP3PWMMA_Profile<[v8i32, v8i32, v8i32, v8i32], 0, 0, 1, 0, 1, 0, 1>;
-def F32_F16X64_SWMMAC_w32 : VOP3PWMMA_Profile<[v8f32, v16f16, v32f16, v8f32], 1, 16, 0, 0, 1, 0, 1>;
-def F32_BF16X64_SWMMAC_w32 : VOP3PWMMA_Profile<[v8f32, v16bf16, v32bf16, v8f32], 1, 16, 0, 0, 1, 0, 1>;
-def F16_F16X64_SWMMAC_w32 : VOP3PWMMA_Profile<[v8f16, v16f16, v32f16, v8f16], 1, 16, 0, 0, 1, 0, 1>;
-def BF16_BF16X64_SWMMAC_w32 : VOP3PWMMA_Profile<[v8bf16, v16bf16, v32bf16, v8bf16], 1, 16, 0, 0, 1, 0, 1>;
-def F32_FP8BF8X128_SWMMAC_w32 : VOP3PWMMA_Profile<[v8f32, v8i32, v16i32, v8f32], 1, 32, 0, 1, 1, 0, 1>;
-def F16_FP8BF8X128_SWMMAC_w32 : VOP3PWMMA_Profile<[v8f16, v8i32, v16i32, v8f16], 1, 32, 0, 1, 1, 0, 1>;
-def I32_IU8X128_SWMMAC_w32 : VOP3PWMMA_Profile<[v8i32, v8i32, v16i32, v8i32], 1, 32, 1, 0, 1, 0, 1>;
-
-multiclass WMMA_F8F6F4_Profiles<bit HasMatrixReuse> {
- def _f8_f8_w32 : VOP3PWMMA_Profile<[v8f32, v16i32, v16i32, v8f32], 0, 0, 0, 1, 1, 1, HasMatrixReuse>;
- def _f8_f6_w32 : VOP3PWMMA_Profile<[v8f32, v16i32, v12i32, v8f32], 0, 0, 0, 1, 1, 1, HasMatrixReuse>;
- def _f8_f4_w32 : VOP3PWMMA_Profile<[v8f32, v16i32, v8i32, v8f32], 0, 0, 0, 1, 1, 1, HasMatrixReuse>;
- def _f6_f8_w32 : VOP3PWMMA_Profile<[v8f32, v12i32, v16i32, v8f32], 0, 0, 0, 1, 1, 1, HasMatrixReuse>;
- def _f6_f6_w32 : VOP3PWMMA_Profile<[v8f32, v12i32, v12i32, v8f32], 0, 0, 0, 1, 1, 1, HasMatrixReuse>;
- def _f6_f4_w32 : VOP3PWMMA_Profile<[v8f32, v12i32, v8i32, v8f32], 0, 0, 0, 1, 1, 1, HasMatrixReuse>;
- def _f4_f8_w32 : VOP3PWMMA_Profile<[v8f32, v8i32, v16i32, v8f32], 0, 0, 0, 1, 1, 1, HasMatrixReuse>;
- def _f4_f6_w32 : VOP3PWMMA_Profile<[v8f32, v8i32, v12i32, v8f32], 0, 0, 0, 1, 1, 1, HasMatrixReuse>;
- def _f4_f4_w32 : VOP3PWMMA_Profile<[v8f32, v8i32, v8i32, v8f32], 0, 0, 0, 1, 1, 1, HasMatrixReuse>;
-}
-
-defm F32_16X16X128_F8F6F4 : WMMA_F8F6F4_Profiles<0>;
+def F32_F32_WMMA_w32 : VOP3PWMMA_Profile<[v8f32, v2f32, v2f32, v8f32], 0, 0, 0, 0, 1, 0, 0, 0, 1>;
+def F32_BF16X32_WMMA_w32 : VOP3PWMMA_Profile<[v8f32, v16bf16, v16bf16, v8f32], 0, 0, 0, 0, 1, 0, 0, 0, 1>;
+def F32_F16X32_WMMA_w32 : VOP3PWMMA_Profile<[v8f32, v16f16, v16f16, v8f32], 0, 0, 0, 0, 1, 0, 0, 0, 1>;
+def F16_F16X32_WMMA_w32 : VOP3PWMMA_Profile<[v8f16, v16f16, v16f16, v8f16], 0, 0, 0, 0, 1, 0, 0, 0, 1>;
+def BF16_BF16X32_WMMA_w32 : VOP3PWMMA_Profile<[v8bf16, v16bf16, v16bf16, v8bf16], 0, 0, 0, 0, 1, 0, 0, 0, 1>;
+def BF16F32_BF16_WMMA_w32 : VOP3PWMMA_Profile<[v8bf16, v16bf16, v16bf16, v8f32], 0, 0, 0, 0, 1, 0, 0, 0, 1>;
+def F32_FP8BF8X64_WMMA_w32 : VOP3PWMMA_Profile<[v8f32, v8i32, v8i32, v8f32], 0, 0, 0, 1, 1, 0, 0, 0, 1>;
+def F32_FP8BF8X128_WMMA_w32 : VOP3PWMMA_Profile<[v8f32, v16i32, v16i32, v8f32], 0, 0, 0, 1, 1, 0, 0, 0, 1>;
+def F16_FP8BF8X64_WMMA_w32 : VOP3PWMMA_Profile<[v8f16, v8i32, v8i32, v8f16], 0, 0, 0, 1, 1, 0, 0, 0, 1>;
+def F16_FP8BF8X128_WMMA_w32 : VOP3PWMMA_Profile<[v8f16, v16i32, v16i32, v8f16], 0, 0, 0, 1, 1, 0, 0, 0, 1>;
+def F32_32X16X128_F4_WMMA_w32 : VOP3PWMMA_Profile<[v16f32, v16i32, v8i32, v16f32], 0, 0, 0, 0, 1, 0, 0, 0, 0, 1>;
+def I32_IU8X64_WMMA_w32 : VOP3PWMMA_Profile<[v8i32, v8i32, v8i32, v8i32], 0, 0, 1, 0, 1, 0, 0, 0, 1>;
+def F32_F16X64_SWMMAC_w32 : VOP3PWMMA_Profile<[v8f32, v16f16, v32f16, v8f32], 1, 16, 0, 0, 1, 0, 0, 0, 1>;
+def F32_BF16X64_SWMMAC_w32 : VOP3PWMMA_Profile<[v8f32, v16bf16, v32bf16, v8f32], 1, 16, 0, 0, 1, 0, 0, 0, 1>;
+def F16_F16X64_SWMMAC_w32 : VOP3PWMMA_Profile<[v8f16, v16f16, v32f16, v8f16], 1, 16, 0, 0, 1, 0, 0, 0, 1>;
+def BF16_BF16X64_SWMMAC_w32 : VOP3PWMMA_Profile<[v8bf16, v16bf16, v32bf16, v8bf16], 1, 16, 0, 0, 1, 0, 0, 0, 1>;
+def F32_FP8BF8X128_SWMMAC_w32 : VOP3PWMMA_Profile<[v8f32, v8i32, v16i32, v8f32], 1, 32, 0, 1, 1, 0, 0, 0, 1>;
+def F16_FP8BF8X128_SWMMAC_w32 : VOP3PWMMA_Profile<[v8f16, v8i32, v16i32, v8f16], 1, 32, 0, 1, 1, 0, 0, 0, 1>;
+def I32_IU8X128_SWMMAC_w32 : VOP3PWMMA_Profile<[v8i32, v8i32, v16i32, v8i32], 1, 32, 1, 0, 1, 0, 0, 0, 1>;
+
+multiclass WMMA_F8F6F4_Profiles<bit HasMatrixScale, bit Scale16, bit HasMatrixReuse> {
+ def _f8_f8_w32 : VOP3PWMMA_Profile<[v8f32, v16i32, v16i32, v8f32], 0, 0, 0, 1, 1, 1, HasMatrixScale, Scale16, HasMatrixReuse>;
+ def _f8_f6_w32 : VOP3PWMMA_Profile<[v8f32, v16i32, v12i32, v8f32], 0, 0, 0, 1, 1, 1, HasMatrixScale, Scale16, HasMatrixReuse>;
+ def _f8_f4_w32 : VOP3PWMMA_Profile<[v8f32, v16i32, v8i32, v8f32], 0, 0, 0, 1, 1, 1, HasMatrixScale, Scale16, HasMatrixReuse>;
+ def _f6_f8_w32 : VOP3PWMMA_Profile<[v8f32, v12i32, v16i32, v8f32], 0, 0, 0, 1, 1, 1, HasMatrixScale, Scale16, HasMatrixReuse>;
+ def _f6_f6_w32 : VOP3PWMMA_Profile<[v8f32, v12i32, v12i32, v8f32], 0, 0, 0, 1, 1, 1, HasMatrixScale, Scale16, HasMatrixReuse>;
+ def _f6_f4_w32 : VOP3PWMMA_Profile<[v8f32, v12i32, v8i32, v8f32], 0, 0, 0, 1, 1, 1, HasMatrixScale, Scale16, HasMatrixReuse>;
+ def _f4_f8_w32 : VOP3PWMMA_Profile<[v8f32, v8i32, v16i32, v8f32], 0, 0, 0, 1, 1, 1, HasMatrixScale, Scale16, HasMatrixReuse>;
+ def _f4_f6_w32 : VOP3PWMMA_Profile<[v8f32, v8i32, v12i32, v8f32], 0, 0, 0, 1, 1, 1, HasMatrixScale, Scale16, HasMatrixReuse>;
+ def _f4_f4_w32 : VOP3PWMMA_Profile<[v8f32, v8i32, v8i32, v8f32], 0, 0, 0, 1, 1, 1, HasMatrixScale, Scale16, HasMatrixReuse>;
+}
+
+defm F32_16X16X128_F8F6F4 : WMMA_F8F6F4_Profiles<0, 0, 0>;
+defm F32_16X16X128_F8F6F4_SCALE : WMMA_F8F6F4_Profiles<1, 0, 1>;
+defm F32_16X16X128_F8F6F4_SCALE16 : WMMA_F8F6F4_Profiles<1, 1, 1>;
+
+class VOP_WMMA_LD_SCALE<ValueType vt, RegisterOperand RC> : VOP3P_Profile<VOPProfile<[untyped, vt, vt, untyped]>> {
+ let HasMatrixScale = 1;
+ let HasMatrixReuse = 1;
+ let HasNeg = 0;
+ let Src0RC64 = RC;
+ let Src1RC64 = RC;
+ let Ins64 = (ins Src0RC64:$src0, Src1RC64:$src1, MatrixAScale:$matrix_a_scale, MatrixBScale:$matrix_b_scale,
+ MatrixAScaleFmt:$matrix_a_scale_fmt, MatrixBScaleFmt:$matrix_b_scale_fmt,
+ MatrixAReuse:$matrix_a_reuse, MatrixBReuse:$matrix_b_reuse);
+ let AsmVOP3P = " $src0, $src1$matrix_a_scale$matrix_b_scale$matrix_a_scale_fmt$matrix_b_scale_fmt$matrix_a_reuse$matrix_b_reuse";
+}
multiclass WMMAInst_SrcFormats_mc<string OpName, string Profile> {
foreach I = ["f8_f8", "f8_f6", "f8_f4", "f6_f8", "f6_f6", "f6_f4", "f4_f8", "f4_f6", "f4_f4"] in {
@@ -1813,9 +1850,12 @@ defm V_SWMMAC_F32_16X16X64_F16_w32 : SWMMACInstGFX12<"v_swmmac_f32_16x16x64
defm V_SWMMAC_F16_16X16X64_F16_w32 : SWMMACInstGFX12<"v_swmmac_f16_16x16x64_f16", F16_F16X64_SWMMAC_w32, "_w32">;
defm V_WMMA_F32_16X16X128_F8F6F4 : WMMAInst_SrcFormats_mc<"v_wmma_f32_16x16x128_f8f6f4", "F32_16X16X128_F8F6F4">;
-
+defm V_WMMA_SCALE_F32_16X16X128_F8F6F4 : WMMAInst_SrcFormats_mc<"v_wmma_scale_f32_16x16x128_f8f6f4", "F32_16X16X128_F8F6F4_SCALE">;
+defm V_WMMA_SCALE16_F32_16X16X128_F8F6F4 : WMMAInst_SrcFormats_mc<"v_wmma_scale16_f32_16x16x128_f8f6f4", "F32_16X16X128_F8F6F4_SCALE16">;
} // End is_wmma_xdl = 1.
+defm V_WMMA_LD_SCALE_PAIRED_B32 : VOP3PInst<"v_wmma_ld_scale_paired_b32", VOP_WMMA_LD_SCALE<i32, VCSrc_b32>>;
+defm V_WMMA_LD_SCALE16_PAIRED_B64 : VOP3PInst<"v_wmma_ld_scale16_paired_b64", VOP_WMMA_LD_SCALE<i64, VCSrc_b64>>;
} // End SubtargetPredicate = isGFX125xOnly
} // End WaveSizePredicate = isWave32
@@ -1973,6 +2013,8 @@ let SubtargetPredicate = isGFX125xOnly in {
foreach I = ["f8_f8", "f8_f6", "f8_f4", "f6_f8", "f6_f6", "f6_f4", "f4_f8", "f4_f6", "f4_f4"] in {
defm : WMMAPat<"V_WMMA_F32_16X16X128_F8F6F4_" # I # "_w32", int_amdgcn_wmma_f32_16x16x128_f8f6f4, !cast<VOP3PWMMA_Profile>("F32_16X16X128_F8F6F4_" # I # "_w32")>;
+ defm : WMMAPat<"V_WMMA_SCALE_F32_16X16X128_F8F6F4_" # I # "_w32", int_amdgcn_wmma_scale_f32_16x16x128_f8f6f4, !cast<VOP3PWMMA_Profile>("F32_16X16X128_F8F6F4_SCALE_" # I # "_w32")>;
+ defm : WMMAPat<"V_WMMA_SCALE16_F32_16X16X128_F8F6F4_" # I # "_w32", int_amdgcn_wmma_scale16_f32_16x16x128_f8f6f4, !cast<VOP3PWMMA_Profile>("F32_16X16X128_F8F6F4_SCALE16_" # I # "_w32")>;
}
def : SWMMACPat<V_SWMMAC_F32_16X16X64_BF16_w32_twoaddr, int_amdgcn_swmmac_f32_16x16x64_bf16, F32_BF16X64_SWMMAC_w32>;
@@ -2105,6 +2147,73 @@ multiclass VOP3P_Real_WMMA_gfx1250_SrcFormats<bits<8> op, string WMMAP> {
}
}
+class VOP3PX2e <bits<8> op, bits<8> LdScaleOp, VOP3PWMMA_Profile P> : Enc128, VOP3Pe_Base {
+ bits<9> scale_src0;
+ bits<9> scale_src1;
+
+ // Inst{7-0} = unused
+ let Inst{10-8} = {0, matrix_b_scale_fmt{1-0}}; // neg_hi
+ let Inst{11} = matrix_a_scale{0}; // scale_op_sel(0)
+ let Inst{12} = 0; // scale_op_sel(1)
+ let Inst{13} = matrix_a_reuse; // scale_op_sel(2)
+ let Inst{14} = matrix_b_reuse; // scale_op_sel_hi(2)
+ let Inst{15} = 0; // scale_clamp
+ let Inst{31-24} = 0xcc; // Encoding
+ let Inst{23-16} = LdScaleOp;
+ let Inst{40-32} = scale_src0;
+ let Inst{49-41} = scale_src1;
+ let Inst{58-50} = 0; // scale src2
+ let Inst{59} = matrix_b_scale{0}; // scale_op_sel_hi(0)
+ let Inst{60} = 0; // scale_op_sel_hi(1)
+ let Inst{63-61} = {0, matrix_a_scale_fmt{1-0}}; // neg (lo)
+
+ // The high half of the encoding is the unscaled wmma op.
+ let Inst{71-64} = vdst;
+
+ let Inst{72} = !if(P.NegHi01, src0_modifiers{1}, 0); // neg_hi src0
+ let Inst{73} = !if(P.NegHi01, src1_modifiers{1}, 0); // neg_hi src1
+ let Inst{74} = !if(P.NegHi2, src2_modifiers{1}, 0); // neg_hi src2
+
+ let Inst{77-75} = !if(P.HasMatrixFMT, matrix_a_fmt{2-0}, 0); // op_sel
+
+ let Inst{78,124,123} = !if(P.HasMatrixFMT, matrix_b_fmt{2-0}, 7); // op_sel_hi
+ let Inst{79} = !if(P.HasClamp, clamp{0}, 0);
+
+ let Inst{87-80} = op;
+ let Inst{95-88} = 0xcc; //encoding
+ let Inst{104-96} = !if(P.HasSrc0, src0, 0);
+ let Inst{113-105} = !if(P.HasSrc1, src1, 0);
+ let Inst{122-114} = !if(P.HasSrc2, src2, 0);
+
+ // neg_lo
+ let Inst{125} = !if(P.NegLo01, src0_modifiers{0}, 0);
+ let Inst{126} = !if(P.NegLo01, src1_modifiers{0}, 0);
+ let Inst{127} = !if(P.NegLo2, src2_modifiers{0}, 0);
+}
+
+multiclass VOP3PX2_Real_ScaledWMMA<bits<8> op, bits<8> LdScaleOp, VOP3PWMMA_Profile WMMAP> {
+ defvar PS = !cast<VOP3P_Pseudo>(NAME # "_twoaddr");
+ defvar asmName = !substr(PS.Mnemonic, 0, !sub(!size(PS.Mnemonic), !size("_f8_f8_w32")));
+ defvar psName = !substr(NAME, 0, !sub(!size(PS.Mnemonic), !size("_f8_f8_w32")));
+ let SubtargetPredicate = isGFX1250Plus, WaveSizePredicate = isWave32,
+ DecoderNamespace = "GFX1250" in {
+ def _gfx1250 : VOP3P_Real_Gen<PS, GFX1250Gen, asmName>,
+ VOP3PX2e <op, LdScaleOp, WMMAP>,
+ MFMA_F8F6F4_WithSizeTable_Helper<PS, psName # "_f8_f8_w32_gfx1250"> {
+ let AsmString = asmName # PS.AsmOperands;
+ }
+ }
+}
+
+multiclass VOP3PX2_Real_ScaledWMMA_SrcFormats<bits<8> op, bits<8> LdScaleOp, string WMMAP> {
+ defm _f8_f8_w32 : VOP3PX2_Real_ScaledWMMA<op, LdScaleOp, !cast<VOP3PWMMA_Profile>(WMMAP # "_f8_f8_w32")>;
+ foreach I = ["f8_f6", "f8_f4", "f6_f8", "f6_f6", "f6_f4", "f4_f8", "f4_f6", "f4_f4"] in {
+ let isAsmParserOnly = true in { // Disable ambiguous disassembly.
+ defm _#I#_w32 : VOP3PX2_Real_ScaledWMMA<op, LdScaleOp, !cast<VOP3PWMMA_Profile>(WMMAP # "_" # I # "_w32")>;
+ }
+ }
+}
+
defm V_WMMA_F32_16X16X16_F16_w32 : VOP3P_Real_WMMA_gfx12 <0x040, F32_F16_WMMA_w32>;
defm V_WMMA_F32_16X16X16_BF16_w32 : VOP3P_Real_WMMA_gfx12 <0x041, F32_BF16_WMMA_w32>;
defm V_WMMA_F16_16X16X16_F16_w32 : VOP3P_Real_WMMA_gfx12 <0x042, F16_F16_WMMA_w32>;
@@ -2180,6 +2289,8 @@ defm V_WMMA_F16_16X16X128_BF8_BF8_w32 : VOP3P_Real_WMMA_gfx1250 <0x087, F16_FP8B
defm V_WMMA_F32_32X16X128_F4_w32 : VOP3P_Real_WMMA_gfx1250 <0x088, F32_32X16X128_F4_WMMA_w32>;
defm V_WMMA_F32_16X16X128_F8F6F4 : VOP3P_Real_WMMA_gfx1250_SrcFormats<0x033, "F32_16X16X128_F8F6F4">;
+defm V_WMMA_SCALE_F32_16X16X128_F8F6F4 : VOP3PX2_Real_ScaledWMMA_SrcFormats<0x033, 0x35, "F32_16X16X128_F8F6F4_SCALE">;
+defm V_WMMA_SCALE16_F32_16X16X128_F8F6F4 : VOP3PX2_Real_ScaledWMMA_SrcFormats<0x033, 0x3a, "F32_16X16X128_F8F6F4_SCALE16">;
defm V_SWMMAC_F32_16X16X64_F16_w32 : VOP3P_Real_WMMA_gfx1250 <0x065, F32_F16X64_SWMMAC_w32>;
defm V_SWMMAC_F32_16X16X64_BF16_w32 : VOP3P_Real_WMMA_gfx1250 <0x066, F32_BF16X64_SWMMAC_w32>;
@@ -2283,6 +2394,9 @@ defm V_FMA_MIX_F32_BF16 : VOP3P_Realtriple<GFX1250Gen, 0x3d>;
defm V_FMA_MIXLO_BF16 : VOP3P_Realtriple<GFX1250Gen, 0x3e>;
defm V_FMA_MIXHI_BF16 : VOP3P_Realtriple<GFX1250Gen, 0x3f>;
+defm V_WMMA_LD_SCALE_PAIRED_B32 : VOP3P_Real_gfx1250<0x35>;
+defm V_WMMA_LD_SCALE16_PAIRED_B64 : VOP3P_Real_gfx1250<0x3a>;
+
let AssemblerPredicate = isGFX1250Plus in
def : AMDGPUMnemonicAlias<"v_fma_mix_f32_f16", "v_fma_mix_f32">;
diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td
index f027ab0..3cad5a1 100644
--- a/llvm/lib/Target/AMDGPU/VOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td
@@ -475,17 +475,24 @@ class VOP3Pe_Base {
bits<1> index_key_32bit;
bits<3> matrix_a_fmt;
bits<3> matrix_b_fmt;
+ bits<1> matrix_a_scale;
+ bits<1> matrix_b_scale;
+ bits<2> matrix_a_scale_fmt;
+ bits<2> matrix_b_scale_fmt;
bits<1> matrix_a_reuse;
bits<1> matrix_b_reuse;
}
class VOP3Pe <VOPProfile P> : Enc64, VOP3Pe_Base {
let Inst{7-0} = !if(P.HasDst, vdst, 0);
- let Inst{8} = !if(P.HasSrc0Mods, src0_modifiers{1}, 0); // neg_hi src0
- let Inst{9} = !if(P.HasSrc1Mods, src1_modifiers{1}, 0); // neg_hi src1
+ let Inst{8} = !if(P.HasSrc0Mods, src0_modifiers{1},
+ !if(P.HasMatrixScale, matrix_b_scale_fmt{0}, 0)); // neg_hi src0
+ let Inst{9} = !if(P.HasSrc1Mods, src1_modifiers{1},
+ !if(P.HasMatrixScale, matrix_b_scale_fmt{1}, 0)); // neg_hi src1
let Inst{10} = !if(P.HasSrc2Mods, src2_modifiers{1}, 0); // neg_hi src2
- let Inst{11} = !if(!and(P.HasSrc0, P.HasOpSel), src0_modifiers{2}, 0); // op_sel(0)
+ let Inst{11} = !if(!and(P.HasSrc0, P.HasOpSel), src0_modifiers{2},
+ !if(P.HasMatrixScale, matrix_a_scale{0}, 0)); // op_sel(0)
let Inst{12} = !if(!and(P.HasSrc1, P.HasOpSel), src1_modifiers{2}, 0); // op_sel(1)
let Inst{13} = !if(!and(P.HasSrc2, P.HasOpSel), src2_modifiers{2},
!if(P.HasMatrixReuse, matrix_a_reuse, 0)); // op_sel(2)
@@ -500,10 +507,17 @@ class VOP3Pe <VOPProfile P> : Enc64, VOP3Pe_Base {
let Inst{40-32} = !if(P.HasSrc0, src0, 0);
let Inst{49-41} = !if(P.HasSrc1, src1, 0);
let Inst{58-50} = !if(P.HasSrc2, src2, 0);
- let Inst{59} = !if(!and(P.HasSrc0, P.HasOpSel), src0_modifiers{3}, !if(P.IsDOT, 1, ?)); // op_sel_hi(0)
- let Inst{60} = !if(!and(P.HasSrc1, P.HasOpSel), src1_modifiers{3}, !if(P.IsDOT, 1, ?)); // op_sel_hi(1)
- let Inst{61} = !if(P.HasSrc0Mods, src0_modifiers{0}, 0); // neg (lo)
- let Inst{62} = !if(P.HasSrc1Mods, src1_modifiers{0}, 0); // neg (lo)
+ let Inst{59} = !cond(!and(P.HasSrc0, P.HasOpSel) : src0_modifiers{3},
+ P.IsDOT : 1,
+ P.HasMatrixScale : matrix_b_scale{0},
+ 1: ?); // op_sel_hi(0)
+ let Inst{60} = !if(!and(P.HasSrc1, P.HasOpSel), src1_modifiers{3},
+ !if(P.HasMatrixScale, 0,
+ !if(P.IsDOT, 1, ?))); // op_sel_hi(1)
+ let Inst{61} = !if(P.HasSrc0Mods, src0_modifiers{0},
+ !if(P.HasMatrixScale, matrix_a_scale_fmt{0}, 0)); // neg (lo)
+ let Inst{62} = !if(P.HasSrc1Mods, src1_modifiers{0},
+ !if(P.HasMatrixScale, matrix_a_scale_fmt{1}, 0)); // neg (lo)
let Inst{63} = !if(P.HasSrc2Mods, src2_modifiers{0}, 0); // neg (lo)
}
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 9366256..7f8b446 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -669,13 +669,9 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
// Integer division functions
// RTABI chapter 4.3.1
- { RTLIB::SDIV_I8, RTLIB::__aeabi_idiv__i8 },
- { RTLIB::SDIV_I16, RTLIB::__aeabi_idiv__i16 },
- { RTLIB::SDIV_I32, RTLIB::__aeabi_idiv__i32},
+ { RTLIB::SDIV_I32, RTLIB::__aeabi_idiv },
{ RTLIB::SDIV_I64, RTLIB::__aeabi_ldivmod },
- { RTLIB::UDIV_I8, RTLIB::__aeabi_uidiv__i8 },
- { RTLIB::UDIV_I16, RTLIB::__aeabi_uidiv__i16 },
- { RTLIB::UDIV_I32, RTLIB::__aeabi_uidiv__i32 },
+ { RTLIB::UDIV_I32, RTLIB::__aeabi_uidiv },
{ RTLIB::UDIV_I64, RTLIB::__aeabi_uldivmod },
};
// clang-format on
diff --git a/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/llvm/lib/Target/ARM/ARMTargetMachine.cpp
index e8d0d35..fedf9e2 100644
--- a/llvm/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetMachine.cpp
@@ -121,10 +121,10 @@ static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
return std::make_unique<ARMElfTargetObjectFile>();
}
-static std::string computeDataLayout(const Triple &TT, StringRef CPU,
+static std::string computeDataLayout(const Triple &TT,
const TargetOptions &Options,
bool isLittle) {
- auto ABI = ARM::computeTargetABI(TT, CPU, Options.MCOptions.ABIName);
+ auto ABI = ARM::computeTargetABI(TT, Options.MCOptions.ABIName);
std::string Ret;
if (isLittle)
@@ -202,11 +202,10 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, const Triple &TT,
std::optional<Reloc::Model> RM,
std::optional<CodeModel::Model> CM,
CodeGenOptLevel OL, bool isLittle)
- : CodeGenTargetMachineImpl(T, computeDataLayout(TT, CPU, Options, isLittle),
- TT, CPU, FS, Options,
- getEffectiveRelocModel(TT, RM),
+ : CodeGenTargetMachineImpl(T, computeDataLayout(TT, Options, isLittle), TT,
+ CPU, FS, Options, getEffectiveRelocModel(TT, RM),
getEffectiveCodeModel(CM, CodeModel::Small), OL),
- TargetABI(ARM::computeTargetABI(TT, CPU, Options.MCOptions.ABIName)),
+ TargetABI(ARM::computeTargetABI(TT, Options.MCOptions.ABIName)),
TLOF(createTLOF(getTargetTriple())), isLittle(isLittle) {
// Default to triple-appropriate float ABI
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index dfa3de3c..cc1c79b 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -296,9 +296,9 @@ static bool needsInterworking(const MCAssembler &Asm, const MCSymbol *Sym,
unsigned FixupKind) {
// Create relocations for unconditional branches to function symbols with
// different execution mode in ELF binaries.
- if (!Sym || !Sym->isELF())
+ if (!Sym || !Asm.getContext().isELF())
return false;
- unsigned Type = cast<MCSymbolELF>(Sym)->getType();
+ unsigned Type = static_cast<const MCSymbolELF *>(Sym)->getType();
if ((Type == ELF::STT_FUNC || Type == ELF::STT_GNU_IFUNC)) {
if (Asm.isThumbFunc(Sym) && (FixupKind == ARM::fixup_arm_uncondbranch))
return true;
@@ -1108,9 +1108,8 @@ std::optional<bool> ARMAsmBackend::evaluateFixup(const MCFragment &F,
}
void ARMAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
- const MCValue &Target,
- MutableArrayRef<char> Data, uint64_t Value,
- bool IsResolved) {
+ const MCValue &Target, uint8_t *Data,
+ uint64_t Value, bool IsResolved) {
if (IsResolved && shouldForceRelocation(Fixup, Target))
IsResolved = false;
maybeAddReloc(F, Fixup, Target, Value, IsResolved);
@@ -1124,14 +1123,15 @@ void ARMAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
return; // Doesn't change encoding.
const unsigned NumBytes = getFixupKindNumBytes(Kind);
- unsigned Offset = Fixup.getOffset();
- assert(Offset + NumBytes <= F.getSize() && "Invalid fixup offset!");
+ assert(Fixup.getOffset() + NumBytes <= F.getSize() &&
+ "Invalid fixup offset!");
// Used to point to big endian bytes.
unsigned FullSizeBytes;
if (Endian == llvm::endianness::big) {
FullSizeBytes = getFixupKindContainerSizeBytes(Kind);
- assert((Offset + FullSizeBytes) <= Data.size() && "Invalid fixup size!");
+ assert(Fixup.getOffset() + FullSizeBytes <= F.getSize() &&
+ "Invalid fixup size!");
assert(NumBytes <= FullSizeBytes && "Invalid fixup size!");
}
@@ -1141,7 +1141,7 @@ void ARMAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
for (unsigned i = 0; i != NumBytes; ++i) {
unsigned Idx =
Endian == llvm::endianness::little ? i : (FullSizeBytes - 1 - i);
- Data[Offset + Idx] |= uint8_t((Value >> (i * 8)) & 0xff);
+ Data[Idx] |= uint8_t((Value >> (i * 8)) & 0xff);
}
}
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h b/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
index 07d2cf7..2844232 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
@@ -40,8 +40,7 @@ public:
std::optional<bool> evaluateFixup(const MCFragment &, MCFixup &, MCValue &,
uint64_t &) override;
void applyFixup(const MCFragment &, const MCFixup &, const MCValue &Target,
- MutableArrayRef<char> Data, uint64_t Value,
- bool IsResolved) override;
+ uint8_t *Data, uint64_t Value, bool IsResolved) override;
unsigned getRelaxedOpcode(unsigned Op, const MCSubtargetInfo &STI) const;
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
index 50e9ca1..d914f6e 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
@@ -97,8 +97,8 @@ unsigned ARMELFObjectWriter::getRelocType(const MCFixup &Fixup,
case ARM::S_TLSLDM_FDPIC:
case ARM::S_TLSLDO:
case ARM::S_TPOFF:
- if (auto *SA = Target.getAddSym())
- cast<MCSymbolELF>(SA)->setType(ELF::STT_TLS);
+ if (auto *SA = const_cast<MCSymbol *>(Target.getAddSym()))
+ static_cast<MCSymbolELF *>(SA)->setType(ELF::STT_TLS);
break;
default:
break;
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
index 6dfe846..0796746 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -614,7 +614,7 @@ public:
if (!IsThumb)
return Val;
- unsigned Type = cast<MCSymbolELF>(Symbol)->getType();
+ unsigned Type = static_cast<MCSymbolELF *>(Symbol)->getType();
if ((Type == ELF::STT_FUNC || Type == ELF::STT_GNU_IFUNC) &&
Symbol->isDefined())
getAssembler().setIsThumbFunc(Symbol);
@@ -679,7 +679,8 @@ private:
}
void EmitMappingSymbol(StringRef Name) {
- auto *Symbol = cast<MCSymbolELF>(getContext().createLocalSymbol(Name));
+ auto *Symbol =
+ static_cast<MCSymbolELF *>(getContext().createLocalSymbol(Name));
emitLabel(Symbol);
Symbol->setType(ELF::STT_NOTYPE);
@@ -687,7 +688,8 @@ private:
}
void emitMappingSymbol(StringRef Name, MCFragment &F, uint64_t Offset) {
- auto *Symbol = cast<MCSymbolELF>(getContext().createLocalSymbol(Name));
+ auto *Symbol =
+ static_cast<MCSymbolELF *>(getContext().createLocalSymbol(Name));
emitLabelAtPos(Symbol, SMLoc(), F, Offset);
Symbol->setType(ELF::STT_NOTYPE);
Symbol->setBinding(ELF::STB_LOCAL);
@@ -1088,7 +1090,7 @@ void ARMTargetELFStreamer::emitLabel(MCSymbol *Symbol) {
return;
Streamer.getAssembler().registerSymbol(*Symbol);
- unsigned Type = cast<MCSymbolELF>(Symbol)->getType();
+ unsigned Type = static_cast<MCSymbolELF *>(Symbol)->getType();
if (Type == ELF::STT_FUNC || Type == ELF::STT_GNU_IFUNC)
emitThumbFunc(Symbol);
}
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
index 354de8f..8ee3a2d 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
@@ -505,7 +505,7 @@ public:
// Remember that the function is a thumb function. Fixup and relocation
// values will need adjusted.
getStreamer().getAssembler().setIsThumbFunc(Symbol);
- cast<MCSymbolMachO>(Symbol)->setThumbFunc();
+ static_cast<MCSymbolMachO *>(Symbol)->setThumbFunc();
}
};
} // namespace
diff --git a/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp b/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp
index 38444f9..05a7d03 100644
--- a/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp
+++ b/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp
@@ -368,9 +368,8 @@ AVRAsmBackend::createObjectTargetWriter() const {
}
void AVRAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
- const MCValue &Target,
- MutableArrayRef<char> Data, uint64_t Value,
- bool IsResolved) {
+ const MCValue &Target, uint8_t *Data,
+ uint64_t Value, bool IsResolved) {
// AVR sets the fixup value to bypass the assembly time overflow with a
// relocation.
if (IsResolved) {
@@ -397,14 +396,14 @@ void AVRAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
// Shift the value into position.
Value <<= Info.TargetOffset;
- unsigned Offset = Fixup.getOffset();
- assert(Offset + NumBytes <= F.getSize() && "Invalid fixup offset!");
+ assert(Fixup.getOffset() + NumBytes <= F.getSize() &&
+ "Invalid fixup offset!");
// For each byte of the fragment that the fixup touches, mask in the
// bits from the fixup value.
for (unsigned i = 0; i < NumBytes; ++i) {
uint8_t mask = (((Value >> (i * 8)) & 0xff));
- Data[Offset + i] |= mask;
+ Data[i] |= mask;
}
}
diff --git a/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h b/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h
index 68c839e..9633669 100644
--- a/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h
+++ b/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h
@@ -38,8 +38,7 @@ public:
createObjectTargetWriter() const override;
void applyFixup(const MCFragment &, const MCFixup &, const MCValue &Target,
- MutableArrayRef<char> Data, uint64_t Value,
- bool IsResolved) override;
+ uint8_t *Data, uint64_t Value, bool IsResolved) override;
std::optional<MCFixupKind> getFixupKind(StringRef Name) const override;
MCFixupKindInfo getFixupKindInfo(MCFixupKind Kind) const override;
diff --git a/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp b/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
index dda8753..53933f9 100644
--- a/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
+++ b/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
@@ -27,8 +27,7 @@ public:
~BPFAsmBackend() override = default;
void applyFixup(const MCFragment &, const MCFixup &, const MCValue &Target,
- MutableArrayRef<char> Data, uint64_t Value,
- bool IsResolved) override;
+ uint8_t *Data, uint64_t Value, bool IsResolved) override;
std::unique_ptr<MCObjectTargetWriter>
createObjectTargetWriter() const override;
@@ -66,35 +65,32 @@ bool BPFAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count,
}
void BPFAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
- const MCValue &Target,
- MutableArrayRef<char> Data, uint64_t Value,
- bool IsResolved) {
+ const MCValue &Target, uint8_t *Data,
+ uint64_t Value, bool IsResolved) {
maybeAddReloc(F, Fixup, Target, Value, IsResolved);
if (Fixup.getKind() == FK_SecRel_8) {
// The Value is 0 for global variables, and the in-section offset
// for static variables. Write to the immediate field of the inst.
assert(Value <= UINT32_MAX);
- support::endian::write<uint32_t>(&Data[Fixup.getOffset() + 4],
- static_cast<uint32_t>(Value),
+ support::endian::write<uint32_t>(Data + 4, static_cast<uint32_t>(Value),
Endian);
} else if (Fixup.getKind() == FK_Data_4 && !Fixup.isPCRel()) {
- support::endian::write<uint32_t>(&Data[Fixup.getOffset()], Value, Endian);
+ support::endian::write<uint32_t>(Data, Value, Endian);
} else if (Fixup.getKind() == FK_Data_8) {
- support::endian::write<uint64_t>(&Data[Fixup.getOffset()], Value, Endian);
+ support::endian::write<uint64_t>(Data, Value, Endian);
} else if (Fixup.getKind() == FK_Data_4 && Fixup.isPCRel()) {
Value = (uint32_t)((Value - 8) / 8);
if (Endian == llvm::endianness::little) {
- Data[Fixup.getOffset() + 1] = 0x10;
- support::endian::write32le(&Data[Fixup.getOffset() + 4], Value);
+ Data[1] = 0x10;
+ support::endian::write32le(Data + 4, Value);
} else {
- Data[Fixup.getOffset() + 1] = 0x1;
- support::endian::write32be(&Data[Fixup.getOffset() + 4], Value);
+ Data[1] = 0x1;
+ support::endian::write32be(Data + 4, Value);
}
} else if (Fixup.getKind() == BPF::FK_BPF_PCRel_4) {
// The input Value represents the number of bytes.
Value = (uint32_t)((Value - 8) / 8);
- support::endian::write<uint32_t>(&Data[Fixup.getOffset() + 4], Value,
- Endian);
+ support::endian::write<uint32_t>(Data + 4, Value, Endian);
} else {
assert(Fixup.getKind() == FK_Data_2 && Fixup.isPCRel());
@@ -103,8 +99,7 @@ void BPFAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
report_fatal_error("Branch target out of insn range");
Value = (uint16_t)((Value - 8) / 8);
- support::endian::write<uint16_t>(&Data[Fixup.getOffset() + 2], Value,
- Endian);
+ support::endian::write<uint16_t>(Data + 2, Value, Endian);
}
}
diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.cpp b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.cpp
index 1bd82fad..6964998 100644
--- a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.cpp
+++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.cpp
@@ -197,9 +197,8 @@ std::optional<bool> CSKYAsmBackend::evaluateFixup(const MCFragment &F,
}
void CSKYAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
- const MCValue &Target,
- MutableArrayRef<char> Data, uint64_t Value,
- bool IsResolved) {
+ const MCValue &Target, uint8_t *Data,
+ uint64_t Value, bool IsResolved) {
if (IsResolved && shouldForceRelocation(Fixup, Target))
IsResolved = false;
maybeAddReloc(F, Fixup, Target, Value, IsResolved);
@@ -217,10 +216,10 @@ void CSKYAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
// Shift the value into position.
Value <<= Info.TargetOffset;
- unsigned Offset = Fixup.getOffset();
unsigned NumBytes = alignTo(Info.TargetSize + Info.TargetOffset, 8) / 8;
- assert(Offset + NumBytes <= F.getSize() && "Invalid fixup offset!");
+ assert(Fixup.getOffset() + NumBytes <= F.getSize() &&
+ "Invalid fixup offset!");
// For each byte of the fragment that the fixup touches, mask in the
// bits from the fixup value.
@@ -228,14 +227,14 @@ void CSKYAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
bool IsInstFixup = (Kind >= FirstTargetFixupKind);
if (IsLittleEndian && IsInstFixup && (NumBytes == 4)) {
- Data[Offset + 0] |= uint8_t((Value >> 16) & 0xff);
- Data[Offset + 1] |= uint8_t((Value >> 24) & 0xff);
- Data[Offset + 2] |= uint8_t(Value & 0xff);
- Data[Offset + 3] |= uint8_t((Value >> 8) & 0xff);
+ Data[0] |= uint8_t((Value >> 16) & 0xff);
+ Data[1] |= uint8_t((Value >> 24) & 0xff);
+ Data[2] |= uint8_t(Value & 0xff);
+ Data[3] |= uint8_t((Value >> 8) & 0xff);
} else {
for (unsigned I = 0; I != NumBytes; I++) {
unsigned Idx = IsLittleEndian ? I : (NumBytes - 1 - I);
- Data[Offset + Idx] |= uint8_t((Value >> (I * 8)) & 0xff);
+ Data[Idx] |= uint8_t((Value >> (I * 8)) & 0xff);
}
}
}
diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.h b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.h
index 1c8516f..5d8826a 100644
--- a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.h
+++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.h
@@ -25,8 +25,7 @@ public:
std::optional<bool> evaluateFixup(const MCFragment &, MCFixup &, MCValue &,
uint64_t &) override;
void applyFixup(const MCFragment &, const MCFixup &, const MCValue &Target,
- MutableArrayRef<char> Data, uint64_t Value,
- bool IsResolved) override;
+ uint8_t *Data, uint64_t Value, bool IsResolved) override;
MCFixupKindInfo getFixupKindInfo(MCFixupKind Kind) const override;
diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYELFObjectWriter.cpp b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYELFObjectWriter.cpp
index d042d26..4667975f 100644
--- a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYELFObjectWriter.cpp
+++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYELFObjectWriter.cpp
@@ -48,8 +48,8 @@ unsigned CSKYELFObjectWriter::getRelocType(const MCFixup &Fixup,
case CSKY::S_TLSGD:
case CSKY::S_TLSLDM:
case CSKY::S_TLSLDO:
- if (auto *SA = Target.getAddSym())
- cast<MCSymbolELF>(SA)->setType(ELF::STT_TLS);
+ if (auto *SA = const_cast<MCSymbol *>(Target.getAddSym()))
+ static_cast<MCSymbolELF *>(SA)->setType(ELF::STT_TLS);
break;
default:
break;
diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYELFStreamer.cpp b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYELFStreamer.cpp
index 346b123..397cf16 100644
--- a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYELFStreamer.cpp
+++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYELFStreamer.cpp
@@ -169,7 +169,8 @@ void CSKYELFStreamer::EmitMappingSymbol(StringRef Name) {
State = (Name == "$t" ? EMS_Text : EMS_Data);
- auto *Symbol = cast<MCSymbolELF>(getContext().createLocalSymbol(Name));
+ auto *Symbol =
+ static_cast<MCSymbolELF *>(getContext().createLocalSymbol(Name));
emitLabel(Symbol);
Symbol->setType(ELF::STT_NOTYPE);
diff --git a/llvm/lib/Target/DirectX/DXContainerGlobals.cpp b/llvm/lib/Target/DirectX/DXContainerGlobals.cpp
index b6e8ce7..26a113d 100644
--- a/llvm/lib/Target/DirectX/DXContainerGlobals.cpp
+++ b/llvm/lib/Target/DirectX/DXContainerGlobals.cpp
@@ -103,7 +103,7 @@ GlobalVariable *DXContainerGlobals::computeShaderHash(Module &M) {
dxbc::ShaderHash HashData = {0, {0}};
// The Hash's IncludesSource flag gets set whenever the hashed shader includes
// debug information.
- if (M.debug_compile_units_begin() != M.debug_compile_units_end())
+ if (!M.debug_compile_units().empty())
HashData.Flags = static_cast<uint32_t>(dxbc::HashFlags::IncludesSource);
memcpy(reinterpret_cast<void *>(&HashData.Digest), Result.data(), 16);
diff --git a/llvm/lib/Target/DirectX/MCTargetDesc/DirectXMCTargetDesc.cpp b/llvm/lib/Target/DirectX/MCTargetDesc/DirectXMCTargetDesc.cpp
index 5323be6..9a14c01 100644
--- a/llvm/lib/Target/DirectX/MCTargetDesc/DirectXMCTargetDesc.cpp
+++ b/llvm/lib/Target/DirectX/MCTargetDesc/DirectXMCTargetDesc.cpp
@@ -78,8 +78,7 @@ public:
~DXILAsmBackend() override = default;
void applyFixup(const MCFragment &, const MCFixup &, const MCValue &Target,
- MutableArrayRef<char> Data, uint64_t Value,
- bool IsResolved) override {}
+ uint8_t *Data, uint64_t Value, bool IsResolved) override {}
std::unique_ptr<MCObjectTargetWriter>
createObjectTargetWriter() const override {
diff --git a/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp b/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp
index 102f1c6..14b6bb3 100644
--- a/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp
@@ -330,7 +330,7 @@ bool HexagonCommonGEP::isHandledGepForm(GetElementPtrInst *GepI) {
if (!GepI->getType()->isPointerTy())
return false;
// No GEPs without any indices. (Is this possible?)
- if (GepI->idx_begin() == GepI->idx_end())
+ if (GepI->indices().empty())
return false;
return true;
}
diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
index 52fa678..613048b 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
@@ -1987,7 +1987,7 @@ SmallVector<uint32_t, 8> HvxSelector::getPerfectCompletions(ShuffleMask SM,
// times). In such cases it will be impossible to complete this to a
// perfect shuffle.
SmallVector<uint32_t, 8> Sorted(Worklist);
- llvm::sort(Sorted.begin(), Sorted.end());
+ llvm::sort(Sorted);
for (unsigned I = 0, E = Sorted.size(); I != E;) {
unsigned P = Sorted[I], Count = 1;
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
index d5b7a75..1a0f1ab 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
@@ -402,8 +402,7 @@ public:
}
void applyFixup(const MCFragment &, const MCFixup &, const MCValue &,
- MutableArrayRef<char> Data, uint64_t FixupValue,
- bool IsResolved) override;
+ uint8_t *Data, uint64_t FixupValue, bool IsResolved) override;
bool isInstRelaxable(MCInst const &HMI) const {
const MCInstrDesc &MCID = HexagonMCInstrInfo::getDesc(*MCII, HMI);
@@ -649,8 +648,7 @@ public:
} // namespace
void HexagonAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
- const MCValue &Target,
- MutableArrayRef<char> Data,
+ const MCValue &Target, uint8_t *InstAddr,
uint64_t FixupValue, bool IsResolved) {
if (IsResolved && shouldForceRelocation(Fixup))
IsResolved = false;
@@ -667,10 +665,9 @@ void HexagonAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
// LLVM gives us an encoded value, we have to convert it back
// to a real offset before we can use it.
- uint32_t Offset = Fixup.getOffset();
unsigned NumBytes = getFixupKindNumBytes(Kind);
- assert(Offset + NumBytes <= F.getSize() && "Invalid fixup offset!");
- char *InstAddr = Data.data() + Offset;
+ assert(Fixup.getOffset() + NumBytes <= F.getSize() &&
+ "Invalid fixup offset!");
Value = adjustFixupValue(Kind, FixupValue);
if (!Value)
@@ -757,8 +754,8 @@ void HexagonAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
uint32_t OldData = 0; for (unsigned i = 0; i < NumBytes; i++) OldData |=
(InstAddr[i] << (i * 8)) & (0xff << (i * 8));
dbgs() << "\tBValue=0x"; dbgs().write_hex(Value) << ": AValue=0x";
- dbgs().write_hex(FixupValue)
- << ": Offset=" << Offset << ": Size=" << Data.size() << ": OInst=0x";
+ dbgs().write_hex(FixupValue) << ": Offset=" << Fixup.getOffset()
+ << ": Size=" << F.getSize() << ": OInst=0x";
dbgs().write_hex(OldData) << ": Reloc=0x"; dbgs().write_hex(Reloc););
// For each byte of the fragment that the fixup touches, mask in the
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp
index 9752f3a..af97ea2 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp
@@ -50,8 +50,8 @@ unsigned HexagonELFObjectWriter::getRelocType(const MCFixup &Fixup,
case HexagonMCExpr::VK_IE:
case HexagonMCExpr::VK_IE_GOT:
case HexagonMCExpr::VK_TPREL:
- if (auto *SA = Target.getAddSym())
- cast<MCSymbolELF>(SA)->setType(ELF::STT_TLS);
+ if (auto *SA = const_cast<MCSymbol *>(Target.getAddSym()))
+ static_cast<MCSymbolELF *>(SA)->setType(ELF::STT_TLS);
break;
default:
break;
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp
index 13ecc23..039ef4f 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp
@@ -96,7 +96,7 @@ void HexagonMCELFStreamer::HexagonMCEmitCommonSymbol(MCSymbol *Symbol,
getAssembler().registerSymbol(*Symbol);
StringRef sbss[4] = {".sbss.1", ".sbss.2", ".sbss.4", ".sbss.8"};
- auto ELFSymbol = cast<MCSymbolELF>(Symbol);
+ auto ELFSymbol = static_cast<MCSymbolELF *>(Symbol);
if (!ELFSymbol->isBindingSet())
ELFSymbol->setBinding(ELF::STB_GLOBAL);
@@ -143,7 +143,7 @@ void HexagonMCELFStreamer::HexagonMCEmitLocalCommonSymbol(MCSymbol *Symbol,
Align ByteAlignment,
unsigned AccessSize) {
getAssembler().registerSymbol(*Symbol);
- auto ELFSymbol = cast<MCSymbolELF>(Symbol);
+ auto ELFSymbol = static_cast<const MCSymbolELF *>(Symbol);
ELFSymbol->setBinding(ELF::STB_LOCAL);
ELFSymbol->setExternal(false);
HexagonMCEmitCommonSymbol(Symbol, Size, ByteAlignment, AccessSize);
diff --git a/llvm/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp b/llvm/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp
index 83d1697..3112dea 100644
--- a/llvm/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp
+++ b/llvm/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp
@@ -48,8 +48,7 @@ public:
: MCAsmBackend(llvm::endianness::big), OSType(OST) {}
void applyFixup(const MCFragment &, const MCFixup &, const MCValue &Target,
- MutableArrayRef<char> Data, uint64_t Value,
- bool IsResolved) override;
+ uint8_t *Data, uint64_t Value, bool IsResolved) override;
std::unique_ptr<MCObjectTargetWriter>
createObjectTargetWriter() const override;
@@ -72,9 +71,8 @@ bool LanaiAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count,
}
void LanaiAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
- const MCValue &Target,
- MutableArrayRef<char> Data, uint64_t Value,
- bool IsResolved) {
+ const MCValue &Target, uint8_t *Data,
+ uint64_t Value, bool IsResolved) {
if (!IsResolved)
Asm->getWriter().recordRelocation(F, Fixup, Target, Value);
@@ -85,7 +83,6 @@ void LanaiAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
// Where in the object and where the number of bytes that need
// fixing up
- unsigned Offset = Fixup.getOffset();
unsigned NumBytes = (getFixupKindInfo(Kind).TargetSize + 7) / 8;
unsigned FullSize = 4;
@@ -95,8 +92,7 @@ void LanaiAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
// Load instruction and apply value
for (unsigned i = 0; i != NumBytes; ++i) {
unsigned Idx = (FullSize - 1 - i);
- CurVal |= static_cast<uint64_t>(static_cast<uint8_t>(Data[Offset + Idx]))
- << (i * 8);
+ CurVal |= static_cast<uint64_t>(static_cast<uint8_t>(Data[Idx])) << (i * 8);
}
uint64_t Mask =
@@ -106,7 +102,7 @@ void LanaiAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
// Write out the fixed up bytes back to the code/data bits.
for (unsigned i = 0; i != NumBytes; ++i) {
unsigned Idx = (FullSize - 1 - i);
- Data[Offset + Idx] = static_cast<uint8_t>((CurVal >> (i * 8)) & 0xff);
+ Data[Idx] = static_cast<uint8_t>((CurVal >> (i * 8)) & 0xff);
}
}
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp
index 858f3d0..fda9d97 100644
--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp
@@ -131,19 +131,18 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
}
}
-static void fixupLeb128(MCContext &Ctx, const MCFixup &Fixup,
- MutableArrayRef<char> Data, uint64_t Value) {
+static void fixupLeb128(MCContext &Ctx, const MCFixup &Fixup, uint8_t *Data,
+ uint64_t Value) {
unsigned I;
- for (I = 0; I != Data.size() && Value; ++I, Value >>= 7)
+ for (I = 0; Value; ++I, Value >>= 7)
Data[I] |= uint8_t(Value & 0x7f);
if (Value)
Ctx.reportError(Fixup.getLoc(), "Invalid uleb128 value!");
}
void LoongArchAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
- const MCValue &Target,
- MutableArrayRef<char> Data, uint64_t Value,
- bool IsResolved) {
+ const MCValue &Target, uint8_t *Data,
+ uint64_t Value, bool IsResolved) {
if (IsResolved && shouldForceRelocation(Fixup, Target))
IsResolved = false;
IsResolved = addReloc(F, Fixup, Target, Value, IsResolved);
@@ -166,14 +165,14 @@ void LoongArchAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
// Shift the value into position.
Value <<= Info.TargetOffset;
- unsigned Offset = Fixup.getOffset();
unsigned NumBytes = alignTo(Info.TargetSize + Info.TargetOffset, 8) / 8;
- assert(Offset + NumBytes <= F.getSize() && "Invalid fixup offset!");
+ assert(Fixup.getOffset() + NumBytes <= F.getSize() &&
+ "Invalid fixup offset!");
// For each byte of the fragment that the fixup touches, mask in the
// bits from the fixup value.
for (unsigned I = 0; I != NumBytes; ++I) {
- Data[Offset + I] |= uint8_t((Value >> (I * 8)) & 0xff);
+ Data[I] |= uint8_t((Value >> (I * 8)) & 0xff);
}
}
@@ -274,15 +273,14 @@ bool LoongArchAsmBackend::relaxDwarfLineAddr(MCFragment &F,
int64_t LineDelta = F.getDwarfLineDelta();
const MCExpr &AddrDelta = F.getDwarfAddrDelta();
- SmallVector<MCFixup, 1> Fixups;
size_t OldSize = F.getVarSize();
int64_t Value;
if (AddrDelta.evaluateAsAbsolute(Value, *Asm))
return false;
- bool IsAbsolute = AddrDelta.evaluateKnownAbsolute(Value, *Asm);
- assert(IsAbsolute && "CFA with invalid expression");
- (void)IsAbsolute;
+ [[maybe_unused]] bool IsAbsolute =
+ AddrDelta.evaluateKnownAbsolute(Value, *Asm);
+ assert(IsAbsolute);
SmallVector<char> Data;
raw_svector_ostream OS(Data);
@@ -293,33 +291,23 @@ bool LoongArchAsmBackend::relaxDwarfLineAddr(MCFragment &F,
encodeSLEB128(LineDelta, OS);
}
- unsigned Offset;
- std::pair<MCFixupKind, MCFixupKind> FK;
-
// According to the DWARF specification, the `DW_LNS_fixed_advance_pc` opcode
// takes a single unsigned half (unencoded) operand. The maximum encodable
// value is therefore 65535. Set a conservative upper bound for relaxation.
+ unsigned PCBytes;
if (Value > 60000) {
unsigned PtrSize = C.getAsmInfo()->getCodePointerSize();
-
- OS << uint8_t(dwarf::DW_LNS_extended_op);
- encodeULEB128(PtrSize + 1, OS);
-
- OS << uint8_t(dwarf::DW_LNE_set_address);
- Offset = OS.tell();
assert((PtrSize == 4 || PtrSize == 8) && "Unexpected pointer size");
- FK = getRelocPairForSize(PtrSize == 4 ? 32 : 64);
+ PCBytes = PtrSize;
+ OS << uint8_t(dwarf::DW_LNS_extended_op) << uint8_t(PtrSize + 1)
+ << uint8_t(dwarf::DW_LNE_set_address);
OS.write_zeros(PtrSize);
} else {
+ PCBytes = 2;
OS << uint8_t(dwarf::DW_LNS_fixed_advance_pc);
- Offset = OS.tell();
- FK = getRelocPairForSize(16);
support::endian::write<uint16_t>(OS, 0, llvm::endianness::little);
}
-
- const MCBinaryExpr &MBE = cast<MCBinaryExpr>(AddrDelta);
- Fixups.push_back(MCFixup::create(Offset, MBE.getLHS(), std::get<0>(FK)));
- Fixups.push_back(MCFixup::create(Offset, MBE.getRHS(), std::get<1>(FK)));
+ auto Offset = OS.tell() - PCBytes;
if (LineDelta == INT64_MAX) {
OS << uint8_t(dwarf::DW_LNS_extended_op);
@@ -330,7 +318,8 @@ bool LoongArchAsmBackend::relaxDwarfLineAddr(MCFragment &F,
}
F.setVarContents(Data);
- F.setVarFixups(Fixups);
+ F.setVarFixups({MCFixup::create(Offset, &AddrDelta,
+ MCFixup::getDataKindForSize(PCBytes))});
WasRelaxed = OldSize != Data.size();
return true;
}
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h
index 3d929fc..1f13601 100644
--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h
@@ -42,8 +42,7 @@ public:
uint64_t &FixedValue, bool IsResolved);
void applyFixup(const MCFragment &, const MCFixup &, const MCValue &Target,
- MutableArrayRef<char> Data, uint64_t Value,
- bool IsResolved) override;
+ uint8_t *Data, uint64_t Value, bool IsResolved) override;
bool shouldForceRelocation(const MCFixup &Fixup, const MCValue &Target);
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp
index fb741af..7e021e4 100644
--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp
@@ -61,8 +61,8 @@ unsigned LoongArchELFObjectWriter::getRelocType(const MCFixup &Fixup,
case ELF::R_LARCH_TLS_LD_PCREL20_S2:
case ELF::R_LARCH_TLS_GD_PCREL20_S2:
case ELF::R_LARCH_TLS_DESC_PCREL20_S2:
- if (auto *SA = Target.getAddSym())
- cast<MCSymbolELF>(SA)->setType(ELF::STT_TLS);
+ if (auto *SA = const_cast<MCSymbol *>(Target.getAddSym()))
+ static_cast<MCSymbolELF *>(SA)->setType(ELF::STT_TLS);
break;
default:
break;
diff --git a/llvm/lib/Target/M68k/MCTargetDesc/M68kAsmBackend.cpp b/llvm/lib/Target/M68k/MCTargetDesc/M68kAsmBackend.cpp
index 7ef705d..fe83dc6 100644
--- a/llvm/lib/Target/M68k/MCTargetDesc/M68kAsmBackend.cpp
+++ b/llvm/lib/Target/M68k/MCTargetDesc/M68kAsmBackend.cpp
@@ -53,8 +53,7 @@ public:
.Default(false)) {}
void applyFixup(const MCFragment &, const MCFixup &, const MCValue &,
- MutableArrayRef<char> Data, uint64_t Value,
- bool IsResolved) override;
+ uint8_t *Data, uint64_t Value, bool IsResolved) override;
bool mayNeedRelaxation(unsigned Opcode, ArrayRef<MCOperand> Operands,
const MCSubtargetInfo &STI) const override;
@@ -78,9 +77,8 @@ public:
} // end anonymous namespace
void M68kAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
- const MCValue &Target,
- MutableArrayRef<char> Data, uint64_t Value,
- bool IsResolved) {
+ const MCValue &Target, uint8_t *Data,
+ uint64_t Value, bool IsResolved) {
if (!IsResolved)
Asm->getWriter().recordRelocation(F, Fixup, Target, Value);
@@ -95,8 +93,7 @@ void M68kAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
// Write in Big Endian
for (unsigned i = 0; i != Size; ++i)
- Data[Fixup.getOffset() + i] =
- uint8_t(static_cast<int64_t>(Value) >> ((Size - i - 1) * 8));
+ Data[i] = uint8_t(static_cast<int64_t>(Value) >> ((Size - i - 1) * 8));
}
/// cc—Carry clear GE—Greater than or equal
diff --git a/llvm/lib/Target/M68k/MCTargetDesc/M68kELFObjectWriter.cpp b/llvm/lib/Target/M68k/MCTargetDesc/M68kELFObjectWriter.cpp
index ca94a47..d070409 100644
--- a/llvm/lib/Target/M68k/MCTargetDesc/M68kELFObjectWriter.cpp
+++ b/llvm/lib/Target/M68k/MCTargetDesc/M68kELFObjectWriter.cpp
@@ -70,8 +70,8 @@ unsigned M68kELFObjectWriter::getRelocType(const MCFixup &Fixup,
case M68k::S_TLSLD:
case M68k::S_TLSLDM:
case M68k::S_TPOFF:
- if (auto *SA = Target.getAddSym())
- cast<MCSymbolELF>(SA)->setType(ELF::STT_TLS);
+ if (auto *SA = const_cast<MCSymbol *>(Target.getAddSym()))
+ static_cast<MCSymbolELF *>(SA)->setType(ELF::STT_TLS);
break;
default:
break;
diff --git a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp
index b513503..d892b3a 100644
--- a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp
+++ b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp
@@ -36,8 +36,7 @@ public:
~MSP430AsmBackend() override = default;
void applyFixup(const MCFragment &, const MCFixup &, const MCValue &Target,
- MutableArrayRef<char> Data, uint64_t Value,
- bool IsResolved) override;
+ uint8_t *Data, uint64_t Value, bool IsResolved) override;
std::unique_ptr<MCObjectTargetWriter>
createObjectTargetWriter() const override {
@@ -105,9 +104,8 @@ uint64_t MSP430AsmBackend::adjustFixupValue(const MCFixup &Fixup,
}
void MSP430AsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
- const MCValue &Target,
- MutableArrayRef<char> Data, uint64_t Value,
- bool IsResolved) {
+ const MCValue &Target, uint8_t *Data,
+ uint64_t Value, bool IsResolved) {
maybeAddReloc(F, Fixup, Target, Value, IsResolved);
Value = adjustFixupValue(Fixup, Value, getContext());
MCFixupKindInfo Info = getFixupKindInfo(Fixup.getKind());
@@ -117,15 +115,14 @@ void MSP430AsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
// Shift the value into position.
Value <<= Info.TargetOffset;
- unsigned Offset = Fixup.getOffset();
unsigned NumBytes = alignTo(Info.TargetSize + Info.TargetOffset, 8) / 8;
-
- assert(Offset + NumBytes <= F.getSize() && "Invalid fixup offset!");
+ assert(Fixup.getOffset() + NumBytes <= F.getSize() &&
+ "Invalid fixup offset!");
// For each byte of the fragment that the fixup touches, mask in the
// bits from the fixup value.
for (unsigned i = 0; i != NumBytes; ++i) {
- Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff);
+ Data[i] |= uint8_t((Value >> (i * 8)) & 0xff);
}
}
diff --git a/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index 259b71b..7b2ee83 100644
--- a/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -2948,8 +2948,9 @@ bool MipsAsmParser::loadAndAddSymbolAddress(const MCExpr *SymExpr,
bool IsPtr64 = ABI.ArePtrs64bit();
bool IsLocalSym =
Res.getAddSym()->isInSection() || Res.getAddSym()->isTemporary() ||
- (Res.getAddSym()->isELF() &&
- cast<MCSymbolELF>(Res.getAddSym())->getBinding() == ELF::STB_LOCAL);
+ (getContext().isELF() &&
+ static_cast<const MCSymbolELF *>(Res.getAddSym())->getBinding() ==
+ ELF::STB_LOCAL);
// For O32, "$"-prefixed symbols are recognized as temporary while
// .L-prefixed symbols are not (PrivateGlobalPrefix is "$"). Recognize ".L"
// manually.
@@ -6653,7 +6654,7 @@ bool MipsAsmParser::searchSymbolAlias(OperandVector &Operands) {
llvm_unreachable("Should never fail");
}
}
- } else if (Sym->isUnset()) {
+ } else if (Sym->isUndefined()) {
// If symbol is unset, it might be created in the `parseSetAssignment`
// routine as an alias for a numeric register name.
// Lookup in the aliases list.
diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
index c2169be..33aab71 100644
--- a/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
+++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
@@ -283,9 +283,8 @@ static bool shouldForceRelocation(const MCFixup &Fixup) {
/// data fragment, at the offset specified by the fixup and following the
/// fixup kind as appropriate.
void MipsAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
- const MCValue &Target,
- MutableArrayRef<char> Data, uint64_t Value,
- bool IsResolved) {
+ const MCValue &Target, uint8_t *Data,
+ uint64_t Value, bool IsResolved) {
if (shouldForceRelocation(Fixup))
IsResolved = false;
maybeAddReloc(F, Fixup, Target, Value, IsResolved);
@@ -297,7 +296,6 @@ void MipsAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
return; // Doesn't change encoding.
// Where do we start in the object
- unsigned Offset = Fixup.getOffset();
// Number of bytes we need to fixup
unsigned NumBytes = (getFixupKindInfo(Kind).TargetSize + 7) / 8;
// Used to point to big endian bytes
@@ -328,7 +326,7 @@ void MipsAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
unsigned Idx = Endian == llvm::endianness::little
? (microMipsLEByteOrder ? calculateMMLEIndex(i) : i)
: (FullSize - 1 - i);
- CurVal |= (uint64_t)((uint8_t)Data[Offset + Idx]) << (i*8);
+ CurVal |= (uint64_t)((uint8_t)Data[Idx]) << (i * 8);
}
uint64_t Mask = ((uint64_t)(-1) >>
@@ -340,7 +338,7 @@ void MipsAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
unsigned Idx = Endian == llvm::endianness::little
? (microMipsLEByteOrder ? calculateMMLEIndex(i) : i)
: (FullSize - 1 - i);
- Data[Offset + Idx] = (uint8_t)((CurVal >> (i*8)) & 0xff);
+ Data[Idx] = (uint8_t)((CurVal >> (i * 8)) & 0xff);
}
}
diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h b/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
index 816626d..40b5853 100644
--- a/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
+++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
@@ -40,8 +40,7 @@ public:
createObjectTargetWriter() const override;
void applyFixup(const MCFragment &, const MCFixup &, const MCValue &Target,
- MutableArrayRef<char> Data, uint64_t Value,
- bool IsResolved) override;
+ uint8_t *Data, uint64_t Value, bool IsResolved) override;
std::optional<MCFixupKind> getFixupKind(StringRef Name) const override;
MCFixupKindInfo getFixupKindInfo(MCFixupKind Kind) const override;
diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
index 7abe9c9..16247bd 100644
--- a/llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
+++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
@@ -166,8 +166,8 @@ unsigned MipsELFObjectWriter::getRelocType(const MCFixup &Fixup,
case Mips::S_GOTTPREL:
case Mips::S_TPREL_HI:
case Mips::S_TPREL_LO:
- if (auto *SA = Target.getAddSym())
- cast<MCSymbolELF>(SA)->setType(ELF::STT_TLS);
+ if (auto *SA = const_cast<MCSymbol *>(Target.getAddSym()))
+ static_cast<MCSymbolELF *>(SA)->setType(ELF::STT_TLS);
break;
default:
break;
@@ -450,6 +450,7 @@ bool MipsELFObjectWriter::needsRelocateWithSymbol(const MCValue &V,
needsRelocateWithSymbol(V, (Type >> 8) & 0xff) ||
needsRelocateWithSymbol(V, (Type >> 16) & 0xff);
+ auto *Sym = static_cast<const MCSymbolELF *>(V.getAddSym());
switch (Type) {
default:
errs() << Type << "\n";
@@ -481,7 +482,7 @@ bool MipsELFObjectWriter::needsRelocateWithSymbol(const MCValue &V,
// FIXME: It should be safe to return false for the STO_MIPS_MICROMIPS but
// we neglect to handle the adjustment to the LSB of the addend that
// it causes in applyFixup() and similar.
- if (cast<MCSymbolELF>(V.getAddSym())->getOther() & ELF::STO_MIPS_MICROMIPS)
+ if (Sym->getOther() & ELF::STO_MIPS_MICROMIPS)
return true;
return false;
@@ -492,7 +493,7 @@ bool MipsELFObjectWriter::needsRelocateWithSymbol(const MCValue &V,
case ELF::R_MIPS_16:
case ELF::R_MIPS_32:
case ELF::R_MIPS_GPREL32:
- if (cast<MCSymbolELF>(V.getAddSym())->getOther() & ELF::STO_MIPS_MICROMIPS)
+ if (Sym->getOther() & ELF::STO_MIPS_MICROMIPS)
return true;
[[fallthrough]];
case ELF::R_MIPS_26:
diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
index e8b9746..feeadc5e 100644
--- a/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
+++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
@@ -76,7 +76,7 @@ void MipsELFStreamer::createPendingLabelRelocs() {
// FIXME: Also mark labels when in MIPS16 mode.
if (ELFTargetStreamer->isMicroMipsEnabled()) {
for (auto *L : Labels) {
- auto *Label = cast<MCSymbolELF>(L);
+ auto *Label = static_cast<MCSymbolELF *>(L);
getAssembler().registerSymbol(*Label);
Label->setOther(ELF::STO_MIPS_MICROMIPS);
}
diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
index d9680c7..5df70c4 100644
--- a/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
+++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
@@ -931,7 +931,7 @@ MipsTargetELFStreamer::MipsTargetELFStreamer(MCStreamer &S,
}
void MipsTargetELFStreamer::emitLabel(MCSymbol *S) {
- auto *Symbol = cast<MCSymbolELF>(S);
+ auto *Symbol = static_cast<MCSymbolELF *>(S);
getStreamer().getAssembler().registerSymbol(*Symbol);
uint8_t Type = Symbol->getType();
if (Type != ELF::STT_FUNC)
@@ -1015,11 +1015,11 @@ void MipsTargetELFStreamer::finish() {
}
void MipsTargetELFStreamer::emitAssignment(MCSymbol *S, const MCExpr *Value) {
- auto *Symbol = cast<MCSymbolELF>(S);
+ auto *Symbol = static_cast<MCSymbolELF *>(S);
// If on rhs is micromips symbol then mark Symbol as microMips.
if (Value->getKind() != MCExpr::SymbolRef)
return;
- const auto &RhsSym = cast<MCSymbolELF>(
+ auto &RhsSym = static_cast<const MCSymbolELF &>(
static_cast<const MCSymbolRefExpr *>(Value)->getSymbol());
if (!(RhsSym.getOther() & ELF::STO_MIPS_MICROMIPS))
@@ -1034,12 +1034,14 @@ MCELFStreamer &MipsTargetELFStreamer::getStreamer() {
void MipsTargetELFStreamer::emitGPRel32Value(const MCExpr *Value) {
auto &S = getStreamer();
+ S.ensureHeadroom(4);
S.addFixup(Value, Mips::fixup_Mips_GPREL32);
S.appendContents(4, 0);
}
void MipsTargetELFStreamer::emitGPRel64Value(const MCExpr *Value) {
auto &S = getStreamer();
+ S.ensureHeadroom(8);
// fixup_Mips_GPREL32 desginates R_MIPS_GPREL32+R_MIPS_64 on MIPS64.
S.addFixup(Value, Mips::fixup_Mips_GPREL32);
S.appendContents(8, 0);
@@ -1047,24 +1049,28 @@ void MipsTargetELFStreamer::emitGPRel64Value(const MCExpr *Value) {
void MipsTargetELFStreamer::emitDTPRel32Value(const MCExpr *Value) {
auto &S = getStreamer();
+ S.ensureHeadroom(4);
S.addFixup(Value, Mips::fixup_Mips_DTPREL32);
S.appendContents(4, 0);
}
void MipsTargetELFStreamer::emitDTPRel64Value(const MCExpr *Value) {
auto &S = getStreamer();
+ S.ensureHeadroom(8);
S.addFixup(Value, Mips::fixup_Mips_DTPREL64);
S.appendContents(8, 0);
}
void MipsTargetELFStreamer::emitTPRel32Value(const MCExpr *Value) {
auto &S = getStreamer();
+ S.ensureHeadroom(4);
S.addFixup(Value, Mips::fixup_Mips_TPREL32);
S.appendContents(4, 0);
}
void MipsTargetELFStreamer::emitTPRel64Value(const MCExpr *Value) {
auto &S = getStreamer();
+ S.ensureHeadroom(8);
S.addFixup(Value, Mips::fixup_Mips_TPREL64);
S.appendContents(8, 0);
}
diff --git a/llvm/lib/Target/Mips/MipsAsmPrinter.cpp b/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
index a2e48ab..4530fc6 100644
--- a/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -1052,8 +1052,7 @@ void MipsAsmPrinter::EmitFPCallStub(
// __call_stub_fp_xxxx:
//
std::string x = "__call_stub_fp_" + std::string(Symbol);
- MCSymbolELF *Stub =
- cast<MCSymbolELF>(OutContext.getOrCreateSymbol(StringRef(x)));
+ MCSymbol *Stub = OutContext.getOrCreateSymbol(StringRef(x));
TS.emitDirectiveEnt(*Stub);
MCSymbol *MType =
OutContext.getOrCreateSymbol("__call_stub_fp_" + Twine(Symbol));
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index 2ae7520..6765ecb 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -151,6 +151,8 @@ class OneUse2<SDPatternOperator operator>
class fpimm_pos_inf<ValueType vt>
: FPImmLeaf<vt, [{ return Imm.isPosInfinity(); }]>;
+class zeroinitializer<ValueType vt> :
+ PatLeaf<(vt (bitconvert (!cast<ValueType>("i" # vt.Size) 0)))>;
// Operands which can hold a Register or an Immediate.
@@ -789,6 +791,23 @@ def UMAX16x2 : I16x2<"max.u", umax>;
def SMIN16x2 : I16x2<"min.s", smin>;
def UMIN16x2 : I16x2<"min.u", umin>;
+let Predicates = [hasPTX<80>, hasSM<90>] in {
+
+ def MIN_RELU_S32 : BasicNVPTXInst<(outs B32:$dst), (ins B32:$a, B32:$b),
+ "min.relu.s32",
+ [(set i32:$dst, (smax (smin i32:$a, i32:$b), 0))]>;
+ def MAX_RELU_S32 : BasicNVPTXInst<(outs B32:$dst), (ins B32:$a, B32:$b),
+ "max.relu.s32",
+ [(set i32:$dst, (smax (smax i32:$a, i32:$b), 0))]>;
+ def MIN_RELU_S16x2 : BasicNVPTXInst<(outs B32:$dst), (ins B32:$a, B32:$b),
+ "min.relu.s16x2",
+ [(set v2i16:$dst, (smax (smin v2i16:$a, v2i16:$b),
+ zeroinitializer<v2i16>))]>;
+ def MAX_RELU_S16x2 : BasicNVPTXInst<(outs B32:$dst), (ins B32:$a, B32:$b),
+ "max.relu.s16x2",
+ [(set v2i16:$dst, (smax (smax v2i16:$a, v2i16:$b),
+ zeroinitializer<v2i16>))]>;
+}
//
// Wide multiplication
@@ -2379,9 +2398,6 @@ def fpimm_any_zero : FPImmLeaf<fAny, [{
return Imm.isZero();
}]>;
-def fpimm_positive_zero_v2f16 : PatFrag<(ops), (v2f16 (bitconvert (i32 0)))>;
-def fpimm_positive_zero_v2bf16 : PatFrag<(ops), (v2bf16 (bitconvert (i32 0)))>;
-
// Perform substitution if fma only has one use, and also if instruction has
// nnan instruction flag or if the TM has NoNaNsFPMath
def NVPTX_fma_oneuse_and_nnan : PatFrag<(ops node:$a, node:$b, node:$c),
@@ -2404,10 +2420,10 @@ class FMARELUInst<RegTyInfo t, bit allow_ftz, PatFrag zero_pat>
let Predicates = [useFP16Math, hasPTX<70>, hasSM<80>] in {
def FMARELU_F16 : FMARELUInst<F16RT, true, fpimm_any_zero>;
- def FMARELU_F16X2 : FMARELUInst<F16X2RT, true, fpimm_positive_zero_v2f16>;
+ def FMARELU_F16X2 : FMARELUInst<F16X2RT, true, zeroinitializer<v2f16>>;
}
let Predicates = [hasBF16Math, hasPTX<70>, hasSM<80>] in {
def FMARELU_BF16 : FMARELUInst<BF16RT, false, fpimm_any_zero>;
- def FMARELU_BF16X2 : FMARELUInst<BF16X2RT, false, fpimm_positive_zero_v2bf16>;
+ def FMARELU_BF16X2 : FMARELUInst<BF16X2RT, false, zeroinitializer<v2bf16>>;
}
diff --git a/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
index 58766b1..1fc475d 100644
--- a/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
+++ b/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
@@ -1756,7 +1756,7 @@ bool PPCAsmParser::parseDirectiveLocalEntry(SMLoc L) {
if (getParser().parseIdentifier(Name))
return Error(L, "expected identifier in '.localentry' directive");
- MCSymbolELF *Sym = cast<MCSymbolELF>(getContext().getOrCreateSymbol(Name));
+ auto *Sym = static_cast<MCSymbolELF *>(getContext().getOrCreateSymbol(Name));
const MCExpr *Expr;
if (parseToken(AsmToken::Comma) ||
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
index 0e8828f..04b886a 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
@@ -13,6 +13,7 @@
#include "llvm/BinaryFormat/MachO.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCMachObjectWriter.h"
#include "llvm/MC/MCObjectWriter.h"
@@ -93,8 +94,8 @@ public:
MCFixupKindInfo getFixupKindInfo(MCFixupKind Kind) const override;
void applyFixup(const MCFragment &, const MCFixup &Fixup,
- const MCValue &Target, MutableArrayRef<char> Data,
- uint64_t Value, bool IsResolved) override;
+ const MCValue &Target, uint8_t *Data, uint64_t Value,
+ bool IsResolved) override;
bool shouldForceRelocation(const MCFixup &Fixup, const MCValue &Target) {
// If there is a @ specifier, unless it is optimized out (e.g. constant @l),
@@ -112,14 +113,15 @@ public:
// to resolve the fixup directly. Emit a relocation and leave
// resolution of the final target address to the linker.
if (const auto *A = Target.getAddSym()) {
- if (const auto *S = dyn_cast<MCSymbolELF>(A)) {
+ if (getContext().isELF()) {
// The "other" values are stored in the last 6 bits of the second
// byte. The traditional defines for STO values assume the full byte
// and thus the shift to pack it.
- unsigned Other = S->getOther() << 2;
+ unsigned Other = static_cast<const MCSymbolELF *>(A)->getOther() << 2;
if ((Other & ELF::STO_PPC64_LOCAL_MASK) != 0)
return true;
- } else if (const auto *S = dyn_cast<MCSymbolXCOFF>(A)) {
+ } else if (getContext().isXCOFF()) {
+ auto *S = static_cast<const MCSymbolXCOFF *>(A);
return !Target.isAbsolute() && S->isExternal() &&
S->getStorageClass() == XCOFF::C_WEAKEXT;
}
@@ -185,9 +187,8 @@ MCFixupKindInfo PPCAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
}
void PPCAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
- const MCValue &TargetVal,
- MutableArrayRef<char> Data, uint64_t Value,
- bool IsResolved) {
+ const MCValue &TargetVal, uint8_t *Data,
+ uint64_t Value, bool IsResolved) {
// In PPC64 ELFv1, .quad .TOC.@tocbase in the .opd section is expected to
// reference the null symbol.
auto Target = TargetVal;
@@ -205,7 +206,6 @@ void PPCAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
if (!Value)
return; // Doesn't change encoding.
- unsigned Offset = Fixup.getOffset();
unsigned NumBytes = getFixupKindNumBytes(Kind);
// For each byte of the fragment that the fixup touches, mask in the bits
@@ -213,7 +213,7 @@ void PPCAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
// bitfields above.
for (unsigned i = 0; i != NumBytes; ++i) {
unsigned Idx = Endian == llvm::endianness::little ? i : (NumBytes - 1 - i);
- Data[Offset + i] |= uint8_t((Value >> (Idx * 8)) & 0xff);
+ Data[i] |= uint8_t((Value >> (Idx * 8)) & 0xff);
}
}
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
index a5d3be4..329ad6e 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
@@ -86,8 +86,8 @@ unsigned PPCELFObjectWriter::getRelocType(const MCFixup &Fixup,
case PPC::S_TPREL_HIGHEST:
case PPC::S_TPREL_HIGHESTA:
case PPC::S_TPREL_LO:
- if (auto *SA = Target.getAddSym())
- cast<MCSymbolELF>(SA)->setType(ELF::STT_TLS);
+ if (auto *SA = const_cast<MCSymbol *>(Target.getAddSym()))
+ static_cast<MCSymbolELF *>(SA)->setType(ELF::STT_TLS);
break;
default:
break;
@@ -499,7 +499,8 @@ bool PPCELFObjectWriter::needsRelocateWithSymbol(const MCValue &V,
// The "other" values are stored in the last 6 bits of the second byte.
// The traditional defines for STO values assume the full byte and thus
// the shift to pack it.
- unsigned Other = cast<MCSymbolELF>(V.getAddSym())->getOther() << 2;
+ unsigned Other =
+ static_cast<const MCSymbolELF *>(V.getAddSym())->getOther() << 2;
return (Other & ELF::STO_PPC64_LOCAL_MASK) != 0;
}
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp
index 2dbc31f..132d5a4 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp
@@ -65,7 +65,7 @@ void PPCELFStreamer::emitPrefixedInstruction(const MCInst &Inst,
MCFragment *InstructionFragment = getCurrentFragment();
SMLoc InstLoc = Inst.getLoc();
// Check if there was a last label emitted.
- if (LastLabel && !LastLabel->isUnset() && LastLabelLoc.isValid() &&
+ if (LastLabel && LastLabel->isDefined() && LastLabelLoc.isValid() &&
InstLoc.isValid()) {
const SourceMgr *SourceManager = getContext().getSourceManager();
unsigned InstLine = SourceManager->FindLineNumber(InstLoc);
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index 3dad0e8..d856c3f 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -211,7 +211,7 @@ public:
: PPCTargetStreamer(S), OS(OS) {}
void emitTCEntry(const MCSymbol &S, PPCMCExpr::Specifier Kind) override {
- if (const MCSymbolXCOFF *XSym = dyn_cast<MCSymbolXCOFF>(&S)) {
+ if (getContext().isXCOFF()) {
MCSymbolXCOFF *TCSym =
static_cast<const MCSectionXCOFF *>(Streamer.getCurrentSectionOnly())
->getQualNameSymbol();
@@ -225,10 +225,10 @@ public:
if (Kind == PPC::S_AIX_TLSGD || Kind == PPC::S_AIX_TLSGDM ||
Kind == PPC::S_AIX_TLSIE || Kind == PPC::S_AIX_TLSLE ||
Kind == PPC::S_AIX_TLSLD || Kind == PPC::S_AIX_TLSML)
- OS << "\t.tc " << TCSym->getName() << "," << XSym->getName() << "@"
+ OS << "\t.tc " << TCSym->getName() << "," << S.getName() << "@"
<< getContext().getAsmInfo()->getSpecifierName(Kind) << '\n';
else
- OS << "\t.tc " << TCSym->getName() << "," << XSym->getName() << '\n';
+ OS << "\t.tc " << TCSym->getName() << "," << S.getName() << '\n';
if (TCSym->hasRename())
Streamer.emitXCOFFRenameDirective(TCSym, TCSym->getSymbolTableName());
@@ -308,7 +308,7 @@ public:
}
void emitAssignment(MCSymbol *S, const MCExpr *Value) override {
- auto *Symbol = cast<MCSymbolELF>(S);
+ auto *Symbol = static_cast<MCSymbolELF *>(S);
// When encoding an assignment to set symbol A to symbol B, also copy
// the st_other bits encoding the local entry point offset.
@@ -335,7 +335,7 @@ private:
auto *Ref = dyn_cast<const MCSymbolRefExpr>(S);
if (!Ref)
return false;
- const auto &RhsSym = cast<MCSymbolELF>(Ref->getSymbol());
+ auto &RhsSym = static_cast<const MCSymbolELF &>(Ref->getSymbol());
unsigned Other = D->getOther();
Other &= ~ELF::STO_PPC64_LOCAL_MASK;
Other |= RhsSym.getOther() & ELF::STO_PPC64_LOCAL_MASK;
diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index ce1d51a..2ab2c14 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -2155,7 +2155,8 @@ void PPCLinuxAsmPrinter::emitFunctionBodyStart() {
PPCTargetStreamer *TS =
static_cast<PPCTargetStreamer *>(OutStreamer->getTargetStreamer());
- TS->emitLocalEntry(cast<MCSymbolELF>(CurrentFnSym), LocalOffsetExp);
+ TS->emitLocalEntry(static_cast<MCSymbolELF *>(CurrentFnSym),
+ LocalOffsetExp);
} else if (Subtarget->isUsingPCRelativeCalls()) {
// When generating the entry point for a function we have a few scenarios
// based on whether or not that function uses R2 and whether or not that
@@ -2182,7 +2183,7 @@ void PPCLinuxAsmPrinter::emitFunctionBodyStart() {
MF->hasInlineAsm() || (!PPCFI->usesTOCBasePtr() && UsesX2OrR2)) {
PPCTargetStreamer *TS =
static_cast<PPCTargetStreamer *>(OutStreamer->getTargetStreamer());
- TS->emitLocalEntry(cast<MCSymbolELF>(CurrentFnSym),
+ TS->emitLocalEntry(static_cast<MCSymbolELF *>(CurrentFnSym),
MCConstantExpr::create(1, OutContext));
}
}
@@ -2766,7 +2767,7 @@ void PPCAIXAsmPrinter::emitGlobalVariableHelper(const GlobalVariable *GV) {
if (GV->hasComdat())
report_fatal_error("COMDAT not yet supported by AIX.");
- MCSymbolXCOFF *GVSym = cast<MCSymbolXCOFF>(getSymbol(GV));
+ auto *GVSym = static_cast<MCSymbolXCOFF *>(getSymbol(GV));
if (GV->isDeclarationForLinker()) {
emitLinkage(GV, GVSym);
@@ -2859,7 +2860,7 @@ void PPCAIXAsmPrinter::emitFunctionDescriptor() {
MCSectionSubPair Current = OutStreamer->getCurrentSection();
// Emit function descriptor.
OutStreamer->switchSection(
- cast<MCSymbolXCOFF>(CurrentFnDescSym)->getRepresentedCsect());
+ static_cast<MCSymbolXCOFF *>(CurrentFnDescSym)->getRepresentedCsect());
// Emit aliasing label for function descriptor csect.
for (const GlobalAlias *Alias : GOAliasMap[&MF->getFunction()])
@@ -2994,7 +2995,8 @@ void PPCAIXAsmPrinter::emitEndOfAsmFile(Module &M) {
SmallString<128> Name;
StringRef Prefix = ".";
Name += Prefix;
- Name += cast<MCSymbolXCOFF>(I.first.first)->getSymbolTableName();
+ Name += static_cast<const MCSymbolXCOFF *>(I.first.first)
+ ->getSymbolTableName();
MCSymbol *S = OutContext.getOrCreateSymbol(Name);
TCEntry = static_cast<MCSectionXCOFF *>(
getObjFileLowering().getSectionForTOCEntry(S, TM));
@@ -3112,7 +3114,7 @@ bool PPCAIXAsmPrinter::doInitialization(Module &M) {
setCsectAlignment(&G);
std::optional<CodeModel::Model> OptionalCodeModel = G.getCodeModel();
if (OptionalCodeModel)
- setOptionalCodeModel(cast<MCSymbolXCOFF>(getSymbol(&G)),
+ setOptionalCodeModel(static_cast<MCSymbolXCOFF *>(getSymbol(&G)),
*OptionalCodeModel);
}
@@ -3139,7 +3141,7 @@ bool PPCAIXAsmPrinter::doInitialization(Module &M) {
if (GVar) {
std::optional<CodeModel::Model> OptionalCodeModel = GVar->getCodeModel();
if (OptionalCodeModel)
- setOptionalCodeModel(cast<MCSymbolXCOFF>(getSymbol(&Alias)),
+ setOptionalCodeModel(static_cast<MCSymbolXCOFF *>(getSymbol(&Alias)),
*OptionalCodeModel);
}
@@ -3190,8 +3192,8 @@ void PPCAIXAsmPrinter::emitInstruction(const MachineInstr *MI) {
case PPC::BL_NOP: {
const MachineOperand &MO = MI->getOperand(0);
if (MO.isSymbol()) {
- MCSymbolXCOFF *S =
- cast<MCSymbolXCOFF>(OutContext.getOrCreateSymbol(MO.getSymbolName()));
+ auto *S = static_cast<MCSymbolXCOFF *>(
+ OutContext.getOrCreateSymbol(MO.getSymbolName()));
ExtSymSDNodeSymbols.insert(S);
}
} break;
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index f179873..30b5fd6 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1433,7 +1433,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
// With 32 condition bits, we don't need to sink (and duplicate) compares
// aggressively in CodeGenPrep.
if (Subtarget.useCRBits()) {
- setHasMultipleConditionRegisters();
setJumpIsExpensive();
}
@@ -5540,8 +5539,8 @@ static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG,
const auto getAIXFuncEntryPointSymbolSDNode = [&](const GlobalValue *GV) {
const TargetMachine &TM = Subtarget.getTargetMachine();
const TargetLoweringObjectFile *TLOF = TM.getObjFileLowering();
- MCSymbolXCOFF *S =
- cast<MCSymbolXCOFF>(TLOF->getFunctionEntryPointSymbol(GV, TM));
+ auto *S =
+ static_cast<MCSymbolXCOFF *>(TLOF->getFunctionEntryPointSymbol(GV, TM));
MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
return DAG.getMCSymbol(S, PtrVT);
@@ -19856,3 +19855,7 @@ Value *PPCTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
return Builder.CreateOr(
Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
}
+
+bool PPCTargetLowering::hasMultipleConditionRegisters(EVT VT) const {
+ return Subtarget.useCRBits();
+}
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 124c711..9755f0e 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -1207,6 +1207,8 @@ namespace llvm {
bool IsVarArg) const;
bool supportsTailCallFor(const CallBase *CB) const;
+ bool hasMultipleConditionRegisters(EVT VT) const override;
+
private:
struct ReuseLoadInfo {
SDValue Ptr;
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
index 9538b20..95ec42f 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
@@ -327,19 +327,19 @@ bool RISCVAsmBackend::relaxAlign(MCFragment &F, unsigned &Size) {
bool RISCVAsmBackend::relaxDwarfLineAddr(MCFragment &F,
bool &WasRelaxed) const {
- MCContext &C = getContext();
-
int64_t LineDelta = F.getDwarfLineDelta();
const MCExpr &AddrDelta = F.getDwarfAddrDelta();
- SmallVector<MCFixup, 1> Fixups;
size_t OldSize = F.getVarSize();
int64_t Value;
+ // If the label difference can be resolved, use the default handling, which
+ // utilizes a shorter special opcode.
+ if (AddrDelta.evaluateAsAbsolute(Value, *Asm))
+ return false;
[[maybe_unused]] bool IsAbsolute =
AddrDelta.evaluateKnownAbsolute(Value, *Asm);
assert(IsAbsolute && "CFA with invalid expression");
- Fixups.clear();
SmallVector<char> Data;
raw_svector_ostream OS(Data);
@@ -349,33 +349,21 @@ bool RISCVAsmBackend::relaxDwarfLineAddr(MCFragment &F,
encodeSLEB128(LineDelta, OS);
}
- unsigned Offset;
- std::pair<MCFixupKind, MCFixupKind> Fixup;
-
// According to the DWARF specification, the `DW_LNS_fixed_advance_pc` opcode
// takes a single unsigned half (unencoded) operand. The maximum encodable
// value is therefore 65535. Set a conservative upper bound for relaxation.
+ unsigned PCBytes;
if (Value > 60000) {
- unsigned PtrSize = C.getAsmInfo()->getCodePointerSize();
-
- OS << uint8_t(dwarf::DW_LNS_extended_op);
- encodeULEB128(PtrSize + 1, OS);
-
- OS << uint8_t(dwarf::DW_LNE_set_address);
- Offset = OS.tell();
- assert((PtrSize == 4 || PtrSize == 8) && "Unexpected pointer size");
- Fixup = RISCV::getRelocPairForSize(PtrSize);
- OS.write_zeros(PtrSize);
+ PCBytes = getContext().getAsmInfo()->getCodePointerSize();
+ OS << uint8_t(dwarf::DW_LNS_extended_op) << uint8_t(PCBytes + 1)
+ << uint8_t(dwarf::DW_LNE_set_address);
+ OS.write_zeros(PCBytes);
} else {
+ PCBytes = 2;
OS << uint8_t(dwarf::DW_LNS_fixed_advance_pc);
- Offset = OS.tell();
- Fixup = RISCV::getRelocPairForSize(2);
support::endian::write<uint16_t>(OS, 0, llvm::endianness::little);
}
-
- const MCBinaryExpr &MBE = cast<MCBinaryExpr>(AddrDelta);
- Fixups.push_back(MCFixup::create(Offset, MBE.getLHS(), std::get<0>(Fixup)));
- Fixups.push_back(MCFixup::create(Offset, MBE.getRHS(), std::get<1>(Fixup)));
+ auto Offset = OS.tell() - PCBytes;
if (LineDelta == INT64_MAX) {
OS << uint8_t(dwarf::DW_LNS_extended_op);
@@ -386,7 +374,8 @@ bool RISCVAsmBackend::relaxDwarfLineAddr(MCFragment &F,
}
F.setVarContents(Data);
- F.setVarFixups(Fixups);
+ F.setVarFixups({MCFixup::create(Offset, &AddrDelta,
+ MCFixup::getDataKindForSize(PCBytes))});
WasRelaxed = OldSize != Data.size();
return true;
}
@@ -754,7 +743,7 @@ std::optional<bool> RISCVAsmBackend::evaluateFixup(const MCFragment &,
if (!AUIPCTarget.getAddSym())
return false;
- const MCSymbolELF &SA = cast<MCSymbolELF>(*AUIPCTarget.getAddSym());
+ auto &SA = static_cast<const MCSymbolELF &>(*AUIPCTarget.getAddSym());
if (SA.isUndefined())
return false;
@@ -881,9 +870,8 @@ bool RISCVAsmBackend::addReloc(const MCFragment &F, const MCFixup &Fixup,
}
void RISCVAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
- const MCValue &Target,
- MutableArrayRef<char> Data, uint64_t Value,
- bool IsResolved) {
+ const MCValue &Target, uint8_t *Data,
+ uint64_t Value, bool IsResolved) {
IsResolved = addReloc(F, Fixup, Target, Value, IsResolved);
MCFixupKind Kind = Fixup.getKind();
if (mc::isRelocation(Kind))
@@ -898,15 +886,14 @@ void RISCVAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
// Shift the value into position.
Value <<= Info.TargetOffset;
- unsigned Offset = Fixup.getOffset();
unsigned NumBytes = alignTo(Info.TargetSize + Info.TargetOffset, 8) / 8;
-
- assert(Offset + NumBytes <= F.getSize() && "Invalid fixup offset!");
+ assert(Fixup.getOffset() + NumBytes <= F.getSize() &&
+ "Invalid fixup offset!");
// For each byte of the fragment that the fixup touches, mask in the
// bits from the fixup value.
for (unsigned i = 0; i != NumBytes; ++i) {
- Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff);
+ Data[i] |= uint8_t((Value >> (i * 8)) & 0xff);
}
}
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h
index d97d632..adec1ec 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h
@@ -46,8 +46,7 @@ public:
void maybeAddVendorReloc(const MCFragment &, const MCFixup &);
void applyFixup(const MCFragment &, const MCFixup &, const MCValue &Target,
- MutableArrayRef<char> Data, uint64_t Value,
- bool IsResolved) override;
+ uint8_t *Data, uint64_t Value, bool IsResolved) override;
std::unique_ptr<MCObjectTargetWriter>
createObjectTargetWriter() const override;
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp
index 9bf7896..2885e3c 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp
@@ -55,8 +55,8 @@ unsigned RISCVELFObjectWriter::getRelocType(const MCFixup &Fixup,
case ELF::R_RISCV_TLS_GOT_HI20:
case ELF::R_RISCV_TLS_GD_HI20:
case ELF::R_RISCV_TLSDESC_HI20:
- if (auto *SA = Target.getAddSym())
- cast<MCSymbolELF>(SA)->setType(ELF::STT_TLS);
+ if (auto *SA = const_cast<MCSymbol *>(Target.getAddSym()))
+ static_cast<MCSymbolELF *>(SA)->setType(ELF::STT_TLS);
break;
case ELF::R_RISCV_PLT32:
case ELF::R_RISCV_GOT32_PCREL:
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp
index c654fd2b..543c4c5 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp
@@ -117,7 +117,7 @@ void RISCVTargetELFStreamer::reset() {
void RISCVTargetELFStreamer::emitDirectiveVariantCC(MCSymbol &Symbol) {
getStreamer().getAssembler().registerSymbol(Symbol);
- cast<MCSymbolELF>(Symbol).setOther(ELF::STO_RISCV_VARIANT_CC);
+ static_cast<MCSymbolELF &>(Symbol).setOther(ELF::STO_RISCV_VARIANT_CC);
}
void RISCVELFStreamer::reset() {
@@ -142,7 +142,8 @@ void RISCVELFStreamer::emitInstructionsMappingSymbol() {
}
void RISCVELFStreamer::emitMappingSymbol(StringRef Name) {
- auto *Symbol = cast<MCSymbolELF>(getContext().createLocalSymbol(Name));
+ auto *Symbol =
+ static_cast<MCSymbolELF *>(getContext().createLocalSymbol(Name));
emitLabel(Symbol);
Symbol->setType(ELF::STT_NOTYPE);
Symbol->setBinding(ELF::STB_LOCAL);
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVFixupKinds.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVFixupKinds.h
index f816561c..98c8738 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVFixupKinds.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVFixupKinds.h
@@ -68,27 +68,6 @@ enum Fixups {
fixup_riscv_invalid,
NumTargetFixupKinds = fixup_riscv_invalid - FirstTargetFixupKind
};
-
-static inline std::pair<MCFixupKind, MCFixupKind>
-getRelocPairForSize(unsigned Size) {
- switch (Size) {
- default:
- llvm_unreachable("unsupported fixup size");
- case 1:
- return std::make_pair(FirstLiteralRelocationKind + ELF::R_RISCV_ADD8,
- FirstLiteralRelocationKind + ELF::R_RISCV_SUB8);
- case 2:
- return std::make_pair(FirstLiteralRelocationKind + ELF::R_RISCV_ADD16,
- FirstLiteralRelocationKind + ELF::R_RISCV_SUB16);
- case 4:
- return std::make_pair(FirstLiteralRelocationKind + ELF::R_RISCV_ADD32,
- FirstLiteralRelocationKind + ELF::R_RISCV_SUB32);
- case 8:
- return std::make_pair(FirstLiteralRelocationKind + ELF::R_RISCV_ADD64,
- FirstLiteralRelocationKind + ELF::R_RISCV_SUB64);
- }
-}
-
} // end namespace llvm::RISCV
#endif
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp
index 3655861..f70837e 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp
@@ -68,36 +68,30 @@ void RISCVTargetStreamer::emitNoteGnuPropertySection(
const Triple &Triple = Ctx.getTargetTriple();
Align NoteAlign;
+ uint64_t DescSize;
if (Triple.isArch64Bit()) {
NoteAlign = Align(8);
+ DescSize = 16;
} else {
assert(Triple.isArch32Bit());
NoteAlign = Align(4);
+ DescSize = 12;
}
assert(Ctx.getObjectFileType() == MCContext::Environment::IsELF);
MCSection *const NoteSection =
Ctx.getELFSection(".note.gnu.property", ELF::SHT_NOTE, ELF::SHF_ALLOC);
- NoteSection->setAlignment(NoteAlign);
OutStreamer.pushSection();
OutStreamer.switchSection(NoteSection);
// Emit the note header
- OutStreamer.emitIntValue(4, 4); // n_namsz
-
- MCSymbol *const NDescBeginSym = Ctx.createTempSymbol();
- MCSymbol *const NDescEndSym = Ctx.createTempSymbol();
- const MCExpr *const NDescSzExpr =
- MCBinaryExpr::createSub(MCSymbolRefExpr::create(NDescEndSym, Ctx),
- MCSymbolRefExpr::create(NDescBeginSym, Ctx), Ctx);
-
- OutStreamer.emitValue(NDescSzExpr, 4); // n_descsz
+ OutStreamer.emitValueToAlignment(NoteAlign);
+ OutStreamer.emitIntValue(4, 4); // n_namsz
+ OutStreamer.emitIntValue(DescSize, 4); // n_descsz
OutStreamer.emitIntValue(ELF::NT_GNU_PROPERTY_TYPE_0, 4); // n_type
OutStreamer.emitBytes(StringRef("GNU", 4)); // n_name
// Emit n_desc field
- OutStreamer.emitLabel(NDescBeginSym);
- OutStreamer.emitValueToAlignment(NoteAlign);
// Emit the feature_1_and property
OutStreamer.emitIntValue(ELF::GNU_PROPERTY_RISCV_FEATURE_1_AND, 4); // pr_type
@@ -105,7 +99,6 @@ void RISCVTargetStreamer::emitNoteGnuPropertySection(
OutStreamer.emitIntValue(Feature1And, 4); // pr_data
OutStreamer.emitValueToAlignment(NoteAlign); // pr_padding
- OutStreamer.emitLabel(NDescEndSym);
OutStreamer.popSection();
}
diff --git a/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp b/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp
index 82c0d8d..80a48c5 100644
--- a/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp
@@ -167,9 +167,8 @@ static std::pair<Value *, Value *> matchStridedStart(Value *Start,
default:
llvm_unreachable("Unexpected opcode");
case Instruction::Or:
- // TODO: We'd be better off creating disjoint or here, but we don't yet
- // have an IRBuilder API for that.
- [[fallthrough]];
+ Start = Builder.CreateOr(Start, Splat, "", /*IsDisjoint=*/true);
+ break;
case Instruction::Add:
Start = Builder.CreateAdd(Start, Splat);
break;
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index adbfbeb..e09e6fb 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -7012,6 +7012,7 @@ static unsigned getRISCVVLOp(SDValue Op) {
OP_CASE(FDIV)
OP_CASE(FNEG)
OP_CASE(FABS)
+ OP_CASE(FCOPYSIGN)
OP_CASE(FSQRT)
OP_CASE(SMIN)
OP_CASE(SMAX)
@@ -7079,6 +7080,15 @@ static unsigned getRISCVVLOp(SDValue Op) {
if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
return RISCVISD::VMXOR_VL;
return RISCVISD::XOR_VL;
+ case ISD::ANY_EXTEND:
+ case ISD::ZERO_EXTEND:
+ return RISCVISD::VZEXT_VL;
+ case ISD::SIGN_EXTEND:
+ return RISCVISD::VSEXT_VL;
+ case ISD::SETCC:
+ return RISCVISD::SETCC_VL;
+ case ISD::VSELECT:
+ return RISCVISD::VMERGE_VL;
case ISD::VP_SELECT:
case ISD::VP_MERGE:
return RISCVISD::VMERGE_VL;
@@ -7419,12 +7429,16 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
if (Op.getOperand(0).getValueType().isVector() &&
Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1);
- return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VZEXT_VL);
+ if (Op.getValueType().isScalableVector())
+ return Op;
+ return lowerToScalableOp(Op, DAG);
case ISD::SIGN_EXTEND:
if (Op.getOperand(0).getValueType().isVector() &&
Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1);
- return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VSEXT_VL);
+ if (Op.getValueType().isScalableVector())
+ return Op;
+ return lowerToScalableOp(Op, DAG);
case ISD::SPLAT_VECTOR_PARTS:
return lowerSPLAT_VECTOR_PARTS(Op, DAG);
case ISD::INSERT_VECTOR_ELT:
@@ -8166,7 +8180,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
if (isPromotedOpNeedingSplit(Op.getOperand(0), Subtarget))
return SplitVectorOp(Op, DAG);
- return lowerFixedLengthVectorSetccToRVV(Op, DAG);
+ return lowerToScalableOp(Op, DAG);
}
case ISD::ADD:
case ISD::SUB:
@@ -8182,6 +8196,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
case ISD::UREM:
case ISD::BSWAP:
case ISD::CTPOP:
+ case ISD::VSELECT:
return lowerToScalableOp(Op, DAG);
case ISD::SHL:
case ISD::SRA:
@@ -8250,14 +8265,12 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return lowerToScalableOp(Op, DAG);
assert(Op.getOpcode() != ISD::CTTZ);
return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
- case ISD::VSELECT:
- return lowerFixedLengthVectorSelectToRVV(Op, DAG);
case ISD::FCOPYSIGN:
if (Op.getValueType() == MVT::f16 || Op.getValueType() == MVT::bf16)
return lowerFCOPYSIGN(Op, DAG, Subtarget);
if (isPromotedOpNeedingSplit(Op, Subtarget))
return SplitVectorOp(Op, DAG);
- return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG);
+ return lowerToScalableOp(Op, DAG);
case ISD::STRICT_FADD:
case ISD::STRICT_FSUB:
case ISD::STRICT_FMUL:
@@ -9694,33 +9707,6 @@ SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
return convertFromScalableVector(VecVT, Select, DAG, Subtarget);
}
-SDValue RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV(
- SDValue Op, SelectionDAG &DAG, unsigned ExtendOpc) const {
- MVT ExtVT = Op.getSimpleValueType();
- // Only custom-lower extensions from fixed-length vector types.
- if (!ExtVT.isFixedLengthVector())
- return Op;
- MVT VT = Op.getOperand(0).getSimpleValueType();
- // Grab the canonical container type for the extended type. Infer the smaller
- // type from that to ensure the same number of vector elements, as we know
- // the LMUL will be sufficient to hold the smaller type.
- MVT ContainerExtVT = getContainerForFixedLengthVector(ExtVT);
- // Get the extended container type manually to ensure the same number of
- // vector elements between source and dest.
- MVT ContainerVT = MVT::getVectorVT(VT.getVectorElementType(),
- ContainerExtVT.getVectorElementCount());
-
- SDValue Op1 =
- convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
-
- SDLoc DL(Op);
- auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
-
- SDValue Ext = DAG.getNode(ExtendOpc, DL, ContainerExtVT, Op1, Mask, VL);
-
- return convertFromScalableVector(ExtVT, Ext, DAG, Subtarget);
-}
-
// Custom-lower truncations from vectors to mask vectors by using a mask and a
// setcc operation:
// (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)
@@ -12834,31 +12820,6 @@ SDValue RISCVTargetLowering::lowerVectorCompress(SDValue Op,
return Res;
}
-SDValue
-RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op,
- SelectionDAG &DAG) const {
- MVT InVT = Op.getOperand(0).getSimpleValueType();
- MVT ContainerVT = getContainerForFixedLengthVector(InVT);
-
- MVT VT = Op.getSimpleValueType();
-
- SDValue Op1 =
- convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
- SDValue Op2 =
- convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
-
- SDLoc DL(Op);
- auto [Mask, VL] = getDefaultVLOps(VT.getVectorNumElements(), ContainerVT, DL,
- DAG, Subtarget);
- MVT MaskVT = getMaskTypeFor(ContainerVT);
-
- SDValue Cmp =
- DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
- {Op1, Op2, Op.getOperand(2), DAG.getUNDEF(MaskVT), Mask, VL});
-
- return convertFromScalableVector(VT, Cmp, DAG, Subtarget);
-}
-
SDValue RISCVTargetLowering::lowerVectorStrictFSetcc(SDValue Op,
SelectionDAG &DAG) const {
unsigned Opc = Op.getOpcode();
@@ -12985,51 +12946,6 @@ SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const {
return Max;
}
-SDValue RISCVTargetLowering::lowerFixedLengthVectorFCOPYSIGNToRVV(
- SDValue Op, SelectionDAG &DAG) const {
- SDLoc DL(Op);
- MVT VT = Op.getSimpleValueType();
- SDValue Mag = Op.getOperand(0);
- SDValue Sign = Op.getOperand(1);
- assert(Mag.getValueType() == Sign.getValueType() &&
- "Can only handle COPYSIGN with matching types.");
-
- MVT ContainerVT = getContainerForFixedLengthVector(VT);
- Mag = convertToScalableVector(ContainerVT, Mag, DAG, Subtarget);
- Sign = convertToScalableVector(ContainerVT, Sign, DAG, Subtarget);
-
- auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
-
- SDValue CopySign = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Mag,
- Sign, DAG.getUNDEF(ContainerVT), Mask, VL);
-
- return convertFromScalableVector(VT, CopySign, DAG, Subtarget);
-}
-
-SDValue RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV(
- SDValue Op, SelectionDAG &DAG) const {
- MVT VT = Op.getSimpleValueType();
- MVT ContainerVT = getContainerForFixedLengthVector(VT);
-
- MVT I1ContainerVT =
- MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
-
- SDValue CC =
- convertToScalableVector(I1ContainerVT, Op.getOperand(0), DAG, Subtarget);
- SDValue Op1 =
- convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
- SDValue Op2 =
- convertToScalableVector(ContainerVT, Op.getOperand(2), DAG, Subtarget);
-
- SDLoc DL(Op);
- SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
-
- SDValue Select = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, CC, Op1,
- Op2, DAG.getUNDEF(ContainerVT), VL);
-
- return convertFromScalableVector(VT, Select, DAG, Subtarget);
-}
-
SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op,
SelectionDAG &DAG) const {
const auto &TSInfo =
@@ -13056,7 +12972,9 @@ SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op,
// "cast" fixed length vector to a scalable vector.
assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) &&
"Only fixed length vectors are supported!");
- Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
+ MVT VContainerVT = ContainerVT.changeVectorElementType(
+ V.getSimpleValueType().getVectorElementType());
+ Ops.push_back(convertToScalableVector(VContainerVT, V, DAG, Subtarget));
}
SDLoc DL(Op);
@@ -21478,11 +21396,10 @@ bool RISCVTargetLowering::canCreateUndefOrPoisonForTargetNode(
// TODO: Add more target nodes.
switch (Op.getOpcode()) {
case RISCVISD::SELECT_CC:
- // Integer select_cc cannot create poison.
- // TODO: What are the FP poison semantics?
- // TODO: This instruction blocks poison from the unselected operand, can
- // we do anything with that?
- return !Op.getValueType().isInteger();
+ // Integer comparisons cannot create poison.
+ assert(Op.getOperand(0).getValueType().isInteger() &&
+ "RISCVISD::SELECT_CC only compares integers");
+ return false;
}
return TargetLowering::canCreateUndefOrPoisonForTargetNode(
Op, DemandedElts, DAG, PoisonOnly, ConsiderFlags, Depth);
@@ -22550,6 +22467,7 @@ SDValue RISCVTargetLowering::LowerFormalArguments(
constexpr StringLiteral SupportedInterruptKinds[] = {
"machine",
"supervisor",
+ "rnmi",
"qci-nest",
"qci-nonest",
"SiFive-CLIC-preemptible",
@@ -22567,6 +22485,8 @@ SDValue RISCVTargetLowering::LowerFormalArguments(
reportFatalUsageError(
"'SiFive-CLIC-*' interrupt kinds require XSfmclic extension");
+ if (Kind == "rnmi" && !Subtarget.hasStdExtSmrnmi())
+ reportFatalUsageError("'rnmi' interrupt kind requires Srnmi extension");
const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
if (Kind.starts_with("SiFive-CLIC-preemptible") && TFI->hasFP(MF))
reportFatalUsageError("'SiFive-CLIC-preemptible' interrupt kinds cannot "
@@ -23212,7 +23132,11 @@ RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
if (Kind == "supervisor")
RetOpc = RISCVISD::SRET_GLUE;
- else if (Kind == "qci-nest" || Kind == "qci-nonest") {
+ else if (Kind == "rnmi") {
+ assert(STI.hasFeature(RISCV::FeatureStdExtSmrnmi) &&
+ "Need Smrnmi extension for rnmi");
+ RetOpc = RISCVISD::MNRET_GLUE;
+ } else if (Kind == "qci-nest" || Kind == "qci-nonest") {
assert(STI.hasFeature(RISCV::FeatureVendorXqciint) &&
"Need Xqciint for qci-(no)nest");
RetOpc = RISCVISD::QC_C_MILEAVERET_GLUE;
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index ca70c46..fa50e21 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -534,9 +534,6 @@ private:
SDValue lowerMaskedScatter(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFixedLengthVectorLoadToRVV(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFixedLengthVectorStoreToRVV(SDValue Op, SelectionDAG &DAG) const;
- SDValue lowerFixedLengthVectorSetccToRVV(SDValue Op, SelectionDAG &DAG) const;
- SDValue lowerFixedLengthVectorSelectToRVV(SDValue Op,
- SelectionDAG &DAG) const;
SDValue lowerToScalableOp(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerIS_FPCLASS(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVPOp(SDValue Op, SelectionDAG &DAG) const;
@@ -551,8 +548,6 @@ private:
SDValue lowerVPStridedLoad(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVPStridedStore(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVPCttzElements(SDValue Op, SelectionDAG &DAG) const;
- SDValue lowerFixedLengthVectorExtendToRVV(SDValue Op, SelectionDAG &DAG,
- unsigned ExtendOpc) const;
SDValue lowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerGET_FPENV(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index 6536078..8bd3830 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -75,6 +75,8 @@ def riscv_sret_glue : RVSDNode<"SRET_GLUE", SDTNone,
[SDNPHasChain, SDNPOptInGlue]>;
def riscv_mret_glue : RVSDNode<"MRET_GLUE", SDTNone,
[SDNPHasChain, SDNPOptInGlue]>;
+def riscv_mnret_glue : RVSDNode<"MNRET_GLUE", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue]>;
def riscv_mileaveret_glue : RVSDNode<"QC_C_MILEAVERET_GLUE", SDTNone,
[SDNPHasChain, SDNPOptInGlue]>;
@@ -935,7 +937,6 @@ def MRET : Priv<"mret", 0b0011000>, Sched<[]> {
let rs1 = 0;
let rs2 = 0b00010;
}
-} // isBarrier = 1, isReturn = 1, isTerminator = 1
let Predicates = [HasStdExtSmrnmi] in {
def MNRET : Priv<"mnret", 0b0111000>, Sched<[]> {
@@ -944,6 +945,8 @@ def MNRET : Priv<"mnret", 0b0111000>, Sched<[]> {
let rs2 = 0b00010;
}
}// Predicates = [HasStdExtSmrnmi]
+} // isBarrier = 1, isReturn = 1, isTerminator = 1
+
def WFI : Priv<"wfi", 0b0001000>, Sched<[]> {
let rd = 0;
@@ -1801,6 +1804,8 @@ def : Pat<(riscv_call texternalsym:$func), (PseudoCALL texternalsym:$func)>;
def : Pat<(riscv_sret_glue), (SRET)>;
def : Pat<(riscv_mret_glue), (MRET)>;
+let Predicates = [HasStdExtSmrnmi] in
+def : Pat<(riscv_mnret_glue), (MNRET)>;
let isCall = 1, Defs = [X1] in {
let Predicates = [NoStdExtZicfilp] in
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td
index 5265613..2c64b0c 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td
@@ -14,6 +14,14 @@
// Operand and SDNode transformation definitions.
//===----------------------------------------------------------------------===//
+def SDT_SetMultiple : SDTypeProfile<0, 4, [SDTCisSameAs<0, 1>,
+ SDTCisSameAs<1, 3>,
+ SDTCisPtrTy<2>,
+ SDTCisVT<3, XLenVT>]>;
+
+def qc_setwmi : RVSDNode<"QC_SETWMI", SDT_SetMultiple,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+
def uimm5nonzero : RISCVOp<XLenVT>,
ImmLeaf<XLenVT, [{return (Imm != 0) && isUInt<5>(Imm);}]> {
let ParserMatchClass = UImmAsmOperand<5, "NonZero">;
@@ -27,6 +35,8 @@ def uimm5nonzero : RISCVOp<XLenVT>,
}];
}
+def tuimm5nonzero : TImmLeaf<XLenVT, [{return (Imm != 0) && isUInt<5>(Imm);}]>;
+
def uimm5gt3 : RISCVOp<XLenVT>, ImmLeaf<XLenVT,
[{return (Imm > 3) && isUInt<5>(Imm);}]> {
let ParserMatchClass = UImmAsmOperand<5, "GT3">;
@@ -92,6 +102,8 @@ def uimm5slist : RISCVOp<XLenVT>, ImmLeaf<XLenVT,
}];
}
+def tuimm7_lsb00 : TImmLeaf<XLenVT,[{return isShiftedUInt<5, 2>(Imm);}]>;
+
def uimm10 : RISCVUImmLeafOp<10>;
def uimm11 : RISCVUImmLeafOp<11>;
@@ -457,6 +469,13 @@ class QCIRVInstRR<bits<5> funct5, DAGOperand InTyRs1, string opcodestr>
: RVInstR<{0b00, funct5}, 0b011, OPC_CUSTOM_0, (outs GPRNoX0:$rd),
(ins InTyRs1:$rs1, GPRNoX0:$rs2), opcodestr, "$rd, $rs1, $rs2">;
+class QCIRVInstRRTied<bits<5> funct5, DAGOperand InTyRs1, string opcodestr>
+ : RVInstR<{0b00, funct5}, 0b011, OPC_CUSTOM_0, (outs GPRNoX0:$rd_wb),
+ (ins GPRNoX0:$rd, InTyRs1:$rs1, GPRNoX0:$rs2), opcodestr,
+ "$rd, $rs1, $rs2"> {
+ let Constraints = "$rd = $rd_wb";
+}
+
class QCIBitManipRII<bits<3> funct3, bits<2> funct2,
DAGOperand InTyRs1, string opcodestr>
: RVInstIBase<funct3, OPC_CUSTOM_0, (outs GPRNoX0:$rd),
@@ -470,11 +489,26 @@ class QCIBitManipRII<bits<3> funct3, bits<2> funct2,
let Inst{24-20} = shamt;
}
+class QCIBitManipRIITied<bits<3> funct3, bits<2> funct2,
+ DAGOperand InTyRs1, string opcodestr>
+ : RVInstIBase<funct3, OPC_CUSTOM_0, (outs GPRNoX0:$rd_wb), (ins GPRNoX0:$rd,
+ InTyRs1:$rs1, uimm5_plus1:$width, uimm5:$shamt),
+ opcodestr, "$rd, $rs1, $width, $shamt"> {
+ let Constraints = "$rd = $rd_wb";
+ bits<5> shamt;
+ bits<5> width;
+
+ let Inst{31-30} = funct2;
+ let Inst{29-25} = width;
+ let Inst{24-20} = shamt;
+}
+
class QCIRVInstRI<bits<1> funct1, DAGOperand InTyImm11,
string opcodestr>
- : RVInstIBase<0b000, OPC_CUSTOM_0, (outs GPRNoX0:$rd),
- (ins GPRNoX0:$rs1, InTyImm11:$imm11), opcodestr,
+ : RVInstIBase<0b000, OPC_CUSTOM_0, (outs GPRNoX0:$rd_wb),
+ (ins GPRNoX0:$rd, GPRNoX0:$rs1, InTyImm11:$imm11), opcodestr,
"$rd, $rs1, $imm11"> {
+ let Constraints = "$rd = $rd_wb";
bits<11> imm11;
let Inst{31-31} = funct1;
@@ -858,12 +892,12 @@ let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
let Inst{29-25} = width;
let Inst{24-20} = shamt;
}
- def QC_INSB : QCIBitManipRII<0b001, 0b01, GPR, "qc.insb">;
- def QC_INSBH : QCIBitManipRII<0b001, 0b10, GPR, "qc.insbh">;
- def QC_INSBR : QCIRVInstRR<0b00000, GPR, "qc.insbr">;
- def QC_INSBHR : QCIRVInstRR<0b00001, GPR, "qc.insbhr">;
- def QC_INSBPR : QCIRVInstRR<0b00010, GPR, "qc.insbpr">;
- def QC_INSBPRH : QCIRVInstRR<0b00011, GPR, "qc.insbprh">;
+ def QC_INSB : QCIBitManipRIITied<0b001, 0b01, GPR, "qc.insb">;
+ def QC_INSBH : QCIBitManipRIITied<0b001, 0b10, GPR, "qc.insbh">;
+ def QC_INSBR : QCIRVInstRRTied<0b00000, GPR, "qc.insbr">;
+ def QC_INSBHR : QCIRVInstRRTied<0b00001, GPR, "qc.insbhr">;
+ def QC_INSBPR : QCIRVInstRRTied<0b00010, GPR, "qc.insbpr">;
+ def QC_INSBPRH : QCIRVInstRRTied<0b00011, GPR, "qc.insbprh">;
def QC_EXTU : QCIBitManipRII<0b010, 0b00, GPRNoX0, "qc.extu">;
def QC_EXTDU : QCIBitManipRII<0b010, 0b10, GPRNoX31, "qc.extdu">;
def QC_EXTDUR : QCIRVInstRR<0b00100, GPRNoX31, "qc.extdur">;
@@ -1566,6 +1600,11 @@ def : QCISELECTIICCPat <SETEQ, QC_SELECTIIEQ>;
def : QCISELECTIICCPat <SETNE, QC_SELECTIINE>;
} // Predicates = [HasVendorXqcics, IsRV32]
+let Predicates = [HasVendorXqcilsm, IsRV32] in {
+def : Pat<(qc_setwmi GPR:$rs3, GPR:$rs1, tuimm5nonzero:$uimm5, tuimm7_lsb00:$uimm7),
+ (QC_SETWMI GPR:$rs3, GPR:$rs1, tuimm5nonzero:$uimm5, tuimm7_lsb00:$uimm7)>;
+} // Predicates = [HasVendorXqcilsm, IsRV32]
+
//===----------------------------------------------------------------------===/i
// Compress Instruction tablegen backend.
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td
index a250ac8..5a5a9ed 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td
@@ -206,8 +206,6 @@ let Predicates = [HasStdExtZvksh], RVVConstraint = VS2Constraint in {
//===----------------------------------------------------------------------===//
defvar I32IntegerVectors = !filter(vti, AllIntegerVectors, !eq(vti.SEW, 32));
-defvar I32I64IntegerVectors = !filter(vti, AllIntegerVectors,
- !or(!eq(vti.SEW, 32), !eq(vti.SEW, 64)));
class ZvkI32IntegerVectors<string vd_lmul> {
list<VTypeInfo> vs2_types = !cond(!eq(vd_lmul, "M8") : !filter(vti, I32IntegerVectors, !le(vti.LMul.octuple, 32)),
@@ -1126,16 +1124,16 @@ let Predicates = [HasStdExtZvkned] in {
defm : VPatUnaryV_S_NoMaskVectorCrypto<"int_riscv_vaesz", "PseudoVAESZ", I32IntegerVectors>;
} // Predicates = [HasStdExtZvkned]
-let Predicates = [HasStdExtZvknha] in {
+let Predicates = [HasStdExtZvknhaOrZvknhb] in {
defm : VPatBinaryV_VV_NoMask<"int_riscv_vsha2ch", "PseudoVSHA2CH", I32IntegerVectors>;
- defm : VPatBinaryV_VV_NoMask<"int_riscv_vsha2cl", "PseudoVSHA2CH", I32IntegerVectors>;
+ defm : VPatBinaryV_VV_NoMask<"int_riscv_vsha2cl", "PseudoVSHA2CL", I32IntegerVectors>;
defm : VPatBinaryV_VV_NoMask<"int_riscv_vsha2ms", "PseudoVSHA2MS", I32IntegerVectors, isSEWAware=true>;
} // Predicates = [HasStdExtZvknha]
let Predicates = [HasStdExtZvknhb] in {
- defm : VPatBinaryV_VV_NoMask<"int_riscv_vsha2ch", "PseudoVSHA2CH", I32I64IntegerVectors>;
- defm : VPatBinaryV_VV_NoMask<"int_riscv_vsha2cl", "PseudoVSHA2CH", I32I64IntegerVectors>;
- defm : VPatBinaryV_VV_NoMask<"int_riscv_vsha2ms", "PseudoVSHA2MS", I32I64IntegerVectors, isSEWAware=true>;
+ defm : VPatBinaryV_VV_NoMask<"int_riscv_vsha2ch", "PseudoVSHA2CH", I64IntegerVectors>;
+ defm : VPatBinaryV_VV_NoMask<"int_riscv_vsha2cl", "PseudoVSHA2CL", I64IntegerVectors>;
+ defm : VPatBinaryV_VV_NoMask<"int_riscv_vsha2ms", "PseudoVSHA2MS", I64IntegerVectors, isSEWAware=true>;
} // Predicates = [HasStdExtZvknhb]
let Predicates = [HasStdExtZvksed] in {
diff --git a/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp b/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp
index 6ecddad..041dd07 100644
--- a/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp
@@ -7,6 +7,8 @@
//===----------------------------------------------------------------------===//
#include "RISCVSelectionDAGInfo.h"
+#include "RISCVSubtarget.h"
+#include "llvm/CodeGen/SelectionDAG.h"
#define GET_SDNODE_DESC
#include "RISCVGenSDNodeInfo.inc"
@@ -62,3 +64,94 @@ void RISCVSelectionDAGInfo::verifyTargetNode(const SelectionDAG &DAG,
}
#endif
}
+
+SDValue RISCVSelectionDAGInfo::EmitTargetCodeForMemset(
+ SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
+ SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline,
+ MachinePointerInfo DstPtrInfo) const {
+ const auto &Subtarget = DAG.getSubtarget<RISCVSubtarget>();
+ // We currently do this only for Xqcilsm
+ if (!Subtarget.hasVendorXqcilsm())
+ return SDValue();
+
+ // Do this only if we know the size at compile time.
+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+ if (!ConstantSize)
+ return SDValue();
+
+ uint64_t NumberOfBytesToWrite = ConstantSize->getZExtValue();
+
+ // Do this only if it is word aligned and we write a multiple of 4 bytes.
+ if (!(Alignment >= 4) || !((NumberOfBytesToWrite & 3) == 0))
+ return SDValue();
+
+ SmallVector<SDValue, 8> OutChains;
+ SDValue SrcValueReplicated = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src);
+ int NumberOfWords = NumberOfBytesToWrite / 4;
+ MachineFunction &MF = DAG.getMachineFunction();
+ auto Volatile =
+ isVolatile ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone;
+
+ // Helper for constructing the QC_SETWMI instruction
+ auto getSetwmiNode = [&](uint8_t SizeWords, uint8_t OffsetSetwmi) -> SDValue {
+ SDValue Ops[] = {Chain, SrcValueReplicated, Dst,
+ DAG.getTargetConstant(SizeWords, dl, MVT::i32),
+ DAG.getTargetConstant(OffsetSetwmi, dl, MVT::i32)};
+ MachineMemOperand *BaseMemOperand = MF.getMachineMemOperand(
+ DstPtrInfo.getWithOffset(OffsetSetwmi),
+ MachineMemOperand::MOStore | Volatile, SizeWords * 4, Align(4));
+ return DAG.getMemIntrinsicNode(RISCVISD::QC_SETWMI, dl,
+ DAG.getVTList(MVT::Other), Ops, MVT::i32,
+ BaseMemOperand);
+ };
+
+ // If i8 type and constant non-zero value.
+ if ((Src.getValueType() == MVT::i8) && !isNullConstant(Src))
+ // Replicate byte to word by multiplication with 0x01010101.
+ SrcValueReplicated =
+ DAG.getNode(ISD::MUL, dl, MVT::i32, SrcValueReplicated,
+ DAG.getConstant(0x01010101ul, dl, MVT::i32));
+
+ // We limit a QC_SETWMI to 16 words or less to improve interruptibility.
+ // So for 1-16 words we use a single QC_SETWMI:
+ //
+ // QC_SETWMI reg1, N, 0(reg2)
+ //
+ // For 17-32 words we use two QC_SETWMI's with the first as 16 words and the
+ // second for the remainder:
+ //
+ // QC_SETWMI reg1, 16, 0(reg2)
+ // QC_SETWMI reg1, N, 64(reg2)
+ //
+ // For 33-48 words, we would like to use (16, 16, n), but that means the last
+ // QC_SETWMI needs an offset of 128 which the instruction doesn't support.
+ // So in this case we use a length of 15 for the second instruction and we do
+ // the rest with the third instruction.
+ // This means the maximum inlined number of words is 47 (for now):
+ //
+ // QC_SETWMI R2, R0, 16, 0
+ // QC_SETWMI R2, R0, 15, 64
+ // QC_SETWMI R2, R0, N, 124
+ //
+ // For 48 words or more, call the target independent memset
+ if (NumberOfWords >= 48)
+ return SDValue();
+
+ if (NumberOfWords <= 16) {
+ // 1 - 16 words
+ return getSetwmiNode(NumberOfWords, 0);
+ }
+
+ if (NumberOfWords <= 32) {
+ // 17 - 32 words
+ OutChains.push_back(getSetwmiNode(NumberOfWords - 16, 64));
+ OutChains.push_back(getSetwmiNode(16, 0));
+ } else {
+ // 33 - 47 words
+ OutChains.push_back(getSetwmiNode(NumberOfWords - 31, 124));
+ OutChains.push_back(getSetwmiNode(15, 64));
+ OutChains.push_back(getSetwmiNode(16, 0));
+ }
+
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
+}
diff --git a/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.h b/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.h
index 641189f..08c8d11 100644
--- a/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.h
@@ -34,6 +34,12 @@ public:
void verifyTargetNode(const SelectionDAG &DAG,
const SDNode *N) const override;
+ SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, const SDLoc &dl,
+ SDValue Chain, SDValue Dst, SDValue Src,
+ SDValue Size, Align Alignment,
+ bool isVolatile, bool AlwaysInline,
+ MachinePointerInfo DstPtrInfo) const override;
+
bool hasPassthruOp(unsigned Opcode) const {
return GenNodeInfo.getDesc(Opcode).TSFlags & RISCVISD::HasPassthruOpMask;
}
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index da6ac2f..3f2a83f 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -642,12 +642,6 @@ void RISCVTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
OptimizationLevel Level) {
LPM.addPass(LoopIdiomVectorizePass(LoopIdiomVectorizeStyle::Predicated));
});
-
- PB.registerVectorizerEndEPCallback(
- [](FunctionPassManager &FPM, OptimizationLevel Level) {
- if (Level.isOptimizingForSpeed())
- FPM.addPass(createFunctionToLoopPassAdaptor(EVLIndVarSimplifyPass()));
- });
}
yaml::MachineFunctionInfo *
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 0d5eb86..67f924a 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -979,11 +979,11 @@ InstructionCost RISCVTTIImpl::getInterleavedMemoryOpCost(
Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
bool UseMaskForCond, bool UseMaskForGaps) const {
- // The interleaved memory access pass will lower interleaved memory ops (i.e
- // a load and store followed by a specific shuffle) to vlseg/vsseg
- // intrinsics.
- if (!UseMaskForCond && !UseMaskForGaps &&
- Factor <= TLI->getMaxSupportedInterleaveFactor()) {
+ // The interleaved memory access pass will lower (de)interleave ops combined
+ // with an adjacent appropriate memory to vlseg/vsseg intrinsics. vlseg/vsseg
+ // only support masking per-iteration (i.e. condition), not per-segment (i.e.
+ // gap).
+ if (!UseMaskForGaps && Factor <= TLI->getMaxSupportedInterleaveFactor()) {
auto *VTy = cast<VectorType>(VecTy);
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(VTy);
// Need to make sure type has't been scalarized
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index d62d99c..05d504c 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -398,6 +398,10 @@ public:
bool enableInterleavedAccessVectorization() const override { return true; }
+ bool enableMaskedInterleavedAccessVectorization() const override {
+ return ST->hasVInstructions();
+ }
+
unsigned getMinTripCountTailFoldingThreshold() const override;
enum RISCVRegisterClass { GPRRC, FPRRC, VRRC };
diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index c946451..37a71e8 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -69,6 +69,7 @@ struct OperandInfo {
// Represent as 1,2,4,8, ... and fractional indicator. This is because
// EMUL can take on values that don't map to RISCVVType::VLMUL values exactly.
// For example, a mask operand can have an EMUL less than MF8.
+ // If nullopt, then EMUL isn't used (i.e. only a single scalar is read).
std::optional<std::pair<unsigned, bool>> EMUL;
unsigned Log2EEW;
@@ -83,12 +84,14 @@ struct OperandInfo {
OperandInfo() = delete;
- static bool EMULAndEEWAreEqual(const OperandInfo &A, const OperandInfo &B) {
- return A.Log2EEW == B.Log2EEW && A.EMUL == B.EMUL;
- }
-
- static bool EEWAreEqual(const OperandInfo &A, const OperandInfo &B) {
- return A.Log2EEW == B.Log2EEW;
+ /// Return true if the EMUL and EEW produced by \p Def is compatible with the
+ /// EMUL and EEW used by \p User.
+ static bool areCompatible(const OperandInfo &Def, const OperandInfo &User) {
+ if (Def.Log2EEW != User.Log2EEW)
+ return false;
+ if (User.EMUL && Def.EMUL != User.EMUL)
+ return false;
+ return true;
}
void print(raw_ostream &OS) const {
@@ -98,7 +101,7 @@ struct OperandInfo {
OS << "f";
OS << EMUL->first;
} else
- OS << "EMUL: unknown\n";
+ OS << "EMUL: none\n";
OS << ", EEW: " << (1 << Log2EEW);
}
};
@@ -1399,13 +1402,7 @@ RISCVVLOptimizer::checkUsers(const MachineInstr &MI) const {
return std::nullopt;
}
- // If the operand is used as a scalar operand, then the EEW must be
- // compatible. Otherwise, the EMUL *and* EEW must be compatible.
- bool IsVectorOpUsedAsScalarOp = isVectorOpUsedAsScalarOp(UserOp);
- if ((IsVectorOpUsedAsScalarOp &&
- !OperandInfo::EEWAreEqual(*ConsumerInfo, *ProducerInfo)) ||
- (!IsVectorOpUsedAsScalarOp &&
- !OperandInfo::EMULAndEEWAreEqual(*ConsumerInfo, *ProducerInfo))) {
+ if (!OperandInfo::areCompatible(*ProducerInfo, *ConsumerInfo)) {
LLVM_DEBUG(
dbgs()
<< " Abort due to incompatible information for EMUL or EEW.\n");
diff --git a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVAsmBackend.cpp b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVAsmBackend.cpp
index ef84d43..5710cf2 100644
--- a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVAsmBackend.cpp
+++ b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVAsmBackend.cpp
@@ -21,8 +21,7 @@ public:
SPIRVAsmBackend(llvm::endianness Endian) : MCAsmBackend(Endian) {}
void applyFixup(const MCFragment &, const MCFixup &, const MCValue &Target,
- MutableArrayRef<char> Data, uint64_t Value,
- bool IsResolved) override {}
+ uint8_t *Data, uint64_t Value, bool IsResolved) override {}
std::unique_ptr<MCObjectTargetWriter>
createObjectTargetWriter() const override {
diff --git a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVInstPrinter.cpp b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVInstPrinter.cpp
index a7f6fbc..64d301e 100644
--- a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVInstPrinter.cpp
+++ b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVInstPrinter.cpp
@@ -375,7 +375,7 @@ void SPIRVInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
if (Op.isReg())
O << '%' << (getIDFromRegister(Op.getReg().id()) + 1);
else if (Op.isImm())
- O << formatImm((int64_t)Op.getImm());
+ O << formatImm(Op.getImm());
else if (Op.isDFPImm())
O << formatImm((double)Op.getDFPImm());
else if (Op.isExpr())
diff --git a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
index 947b574..2c3e087 100644
--- a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
@@ -21,7 +21,9 @@
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/IntrinsicsSPIRV.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/TypedPointerType.h"
+#include "llvm/Transforms/Utils/Local.h"
#include <queue>
#include <unordered_set>
@@ -187,6 +189,8 @@ class SPIRVEmitIntrinsics
void applyDemangledPtrArgTypes(IRBuilder<> &B);
+ GetElementPtrInst *simplifyZeroLengthArrayGepInst(GetElementPtrInst *GEP);
+
bool runOnFunction(Function &F);
bool postprocessTypes(Module &M);
bool processFunctionPointers(Module &M);
@@ -1458,6 +1462,24 @@ static void createSaturatedConversionDecoration(Instruction *I,
createDecorationIntrinsic(I, SaturatedConversionNode, B);
}
+static void addSaturatedDecorationToIntrinsic(Instruction *I, IRBuilder<> &B) {
+ if (auto *CI = dyn_cast<CallInst>(I)) {
+ if (Function *Fu = CI->getCalledFunction()) {
+ if (Fu->isIntrinsic()) {
+ unsigned const int IntrinsicId = Fu->getIntrinsicID();
+ switch (IntrinsicId) {
+ case Intrinsic::fptosi_sat:
+ case Intrinsic::fptoui_sat:
+ createSaturatedConversionDecoration(I, B);
+ break;
+ default:
+ break;
+ }
+ }
+ }
+ }
+}
+
Instruction *SPIRVEmitIntrinsics::visitCallInst(CallInst &Call) {
if (!Call.isInlineAsm())
return &Call;
@@ -2543,6 +2565,30 @@ void SPIRVEmitIntrinsics::applyDemangledPtrArgTypes(IRBuilder<> &B) {
}
}
+GetElementPtrInst *
+SPIRVEmitIntrinsics::simplifyZeroLengthArrayGepInst(GetElementPtrInst *GEP) {
+ // getelementptr [0 x T], P, 0 (zero), I -> getelementptr T, P, I.
+ // If type is 0-length array and first index is 0 (zero), drop both the
+ // 0-length array type and the first index. This is a common pattern in the
+ // IR, e.g. when using a zero-length array as a placeholder for a flexible
+ // array such as unbound arrays.
+ assert(GEP && "GEP is null");
+ Type *SrcTy = GEP->getSourceElementType();
+ SmallVector<Value *, 8> Indices(GEP->indices());
+ ArrayType *ArrTy = dyn_cast<ArrayType>(SrcTy);
+ if (ArrTy && ArrTy->getNumElements() == 0 &&
+ PatternMatch::match(Indices[0], PatternMatch::m_Zero())) {
+ IRBuilder<> Builder(GEP);
+ Indices.erase(Indices.begin());
+ SrcTy = ArrTy->getElementType();
+ Value *NewGEP = Builder.CreateGEP(SrcTy, GEP->getPointerOperand(), Indices,
+ "", GEP->getNoWrapFlags());
+ assert(llvm::isa<GetElementPtrInst>(NewGEP) && "NewGEP should be a GEP");
+ return cast<GetElementPtrInst>(NewGEP);
+ }
+ return nullptr;
+}
+
bool SPIRVEmitIntrinsics::runOnFunction(Function &Func) {
if (Func.isDeclaration())
return false;
@@ -2560,14 +2606,30 @@ bool SPIRVEmitIntrinsics::runOnFunction(Function &Func) {
AggrConstTypes.clear();
AggrStores.clear();
- // fix GEP result types ahead of inference
+ // Fix GEP result types ahead of inference, and simplify if possible.
+ // Data structure for dead instructions that were simplified and replaced.
+ SmallPtrSet<Instruction *, 4> DeadInsts;
for (auto &I : instructions(Func)) {
auto *Ref = dyn_cast<GetElementPtrInst>(&I);
if (!Ref || GR->findDeducedElementType(Ref))
continue;
+
+ GetElementPtrInst *NewGEP = simplifyZeroLengthArrayGepInst(Ref);
+ if (NewGEP) {
+ Ref->replaceAllUsesWith(NewGEP);
+ if (isInstructionTriviallyDead(Ref))
+ DeadInsts.insert(Ref);
+
+ Ref = NewGEP;
+ }
if (Type *GepTy = getGEPType(Ref))
GR->addDeducedElementType(Ref, normalizeType(GepTy));
}
+ // Remove dead instructions that were simplified and replaced.
+ for (auto *I : DeadInsts) {
+ assert(I->use_empty() && "Dead instruction should not have any uses left");
+ I->eraseFromParent();
+ }
processParamTypesByFunHeader(CurrF, B);
@@ -2640,6 +2702,7 @@ bool SPIRVEmitIntrinsics::runOnFunction(Function &Func) {
if (isConvergenceIntrinsic(I))
continue;
+ addSaturatedDecorationToIntrinsic(I, B);
processInstrAfterVisit(I, B);
}
diff --git a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp
index 83fccdc..960eb2e 100644
--- a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp
@@ -828,6 +828,8 @@ SPIRVType *SPIRVGlobalRegistry::getOpTypeArray(uint32_t NumElems,
"Invalid array element type");
SPIRVType *SpvTypeInt32 = getOrCreateSPIRVIntegerType(32, MIRBuilder);
SPIRVType *ArrayType = nullptr;
+ const SPIRVSubtarget &ST =
+ cast<SPIRVSubtarget>(MIRBuilder.getMF().getSubtarget());
if (NumElems != 0) {
Register NumElementsVReg =
buildConstantInt(NumElems, MIRBuilder, SpvTypeInt32, EmitIR);
@@ -838,6 +840,8 @@ SPIRVType *SPIRVGlobalRegistry::getOpTypeArray(uint32_t NumElems,
.addUse(NumElementsVReg);
});
} else {
+ if (!ST.isShader())
+ return nullptr;
ArrayType = createOpType(MIRBuilder, [&](MachineIRBuilder &MIRBuilder) {
return MIRBuilder.buildInstr(SPIRV::OpTypeRuntimeArray)
.addDef(createTypeVReg(MIRBuilder))
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
index d4fa62a..e9f5ffa 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
@@ -665,6 +665,11 @@ bool SPIRVInstructionSelector::spvSelect(Register ResVReg,
case TargetOpcode::G_FPTOUI:
return selectUnOp(ResVReg, ResType, I, SPIRV::OpConvertFToU);
+ case TargetOpcode::G_FPTOSI_SAT:
+ return selectUnOp(ResVReg, ResType, I, SPIRV::OpConvertFToS);
+ case TargetOpcode::G_FPTOUI_SAT:
+ return selectUnOp(ResVReg, ResType, I, SPIRV::OpConvertFToU);
+
case TargetOpcode::G_SITOFP:
return selectIToF(ResVReg, ResType, I, true, SPIRV::OpConvertSToF);
case TargetOpcode::G_UITOFP:
diff --git a/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp b/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp
index 1995e0f..170bddd 100644
--- a/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp
@@ -203,6 +203,10 @@ SPIRVLegalizerInfo::SPIRVLegalizerInfo(const SPIRVSubtarget &ST) {
.legalForCartesianProduct(allIntScalarsAndVectors,
allFloatScalarsAndVectors);
+ getActionDefinitionsBuilder({G_FPTOSI_SAT, G_FPTOUI_SAT})
+ .legalForCartesianProduct(allIntScalarsAndVectors,
+ allFloatScalarsAndVectors);
+
getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
.legalForCartesianProduct(allFloatScalarsAndVectors,
allScalarsAndVectors);
diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
index 0cd9d78..ab06fc0 100644
--- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
@@ -744,8 +744,14 @@ void SPIRV::RequirementHandler::checkSatisfiable(
IsSatisfiable = false;
}
+ AvoidCapabilitiesSet AvoidCaps;
+ if (!ST.isShader())
+ AvoidCaps.S.insert(SPIRV::Capability::Shader);
+ else
+ AvoidCaps.S.insert(SPIRV::Capability::Kernel);
+
for (auto Cap : MinimalCaps) {
- if (AvailableCaps.contains(Cap))
+ if (AvailableCaps.contains(Cap) && !AvoidCaps.S.contains(Cap))
continue;
LLVM_DEBUG(dbgs() << "Capability not supported: "
<< getSymbolicOperandMnemonic(
@@ -1865,6 +1871,11 @@ void addInstrRequirements(const MachineInstr &MI,
Reqs.addCapability(SPIRV::Capability::TernaryBitwiseFunctionINTEL);
break;
}
+ case SPIRV::OpCopyMemorySized: {
+ Reqs.addCapability(SPIRV::Capability::Addresses);
+ // TODO: Add UntypedPointersKHR when implemented.
+ break;
+ }
default:
break;
diff --git a/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp b/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
index ba023af..bc60842 100644
--- a/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
+++ b/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
@@ -127,8 +127,7 @@ public:
std::optional<MCFixupKind> getFixupKind(StringRef Name) const override;
MCFixupKindInfo getFixupKindInfo(MCFixupKind Kind) const override;
void applyFixup(const MCFragment &, const MCFixup &, const MCValue &Target,
- MutableArrayRef<char> Data, uint64_t Value,
- bool IsResolved) override;
+ uint8_t *Data, uint64_t Value, bool IsResolved) override;
bool writeNopData(raw_ostream &OS, uint64_t Count,
const MCSubtargetInfo *STI) const override {
@@ -253,21 +252,19 @@ MCFixupKindInfo SparcAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
}
void SparcAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
- const MCValue &Target,
- MutableArrayRef<char> Data, uint64_t Value,
- bool IsResolved) {
+ const MCValue &Target, uint8_t *Data,
+ uint64_t Value, bool IsResolved) {
maybeAddReloc(F, Fixup, Target, Value, IsResolved);
if (!IsResolved)
return;
Value = adjustFixupValue(Fixup.getKind(), Value);
unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind());
- unsigned Offset = Fixup.getOffset();
// For each byte of the fragment that the fixup touches, mask in the
// bits from the fixup value.
for (unsigned i = 0; i != NumBytes; ++i) {
unsigned Idx = Endian == llvm::endianness::little ? i : (NumBytes - 1) - i;
- Data[Offset + Idx] |= uint8_t((Value >> (i * 8)) & 0xff);
+ Data[Idx] |= uint8_t((Value >> (i * 8)) & 0xff);
}
}
diff --git a/llvm/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp b/llvm/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp
index a95c4ff..d2071c3 100644
--- a/llvm/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp
+++ b/llvm/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp
@@ -58,8 +58,8 @@ unsigned SparcELFObjectWriter::getRelocType(const MCFixup &Fixup,
case ELF::R_SPARC_TLS_IE_ADD:
case ELF::R_SPARC_TLS_LE_HIX22:
case ELF::R_SPARC_TLS_LE_LOX10:
- if (auto *SA = Target.getAddSym())
- cast<MCSymbolELF>(SA)->setType(ELF::STT_TLS);
+ if (auto *SA = const_cast<MCSymbol *>(Target.getAddSym()))
+ static_cast<MCSymbolELF *>(SA)->setType(ELF::STT_TLS);
break;
default:
break;
diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZELFObjectWriter.cpp b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZELFObjectWriter.cpp
index 8b5587a..1bca5c7 100644
--- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZELFObjectWriter.cpp
+++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZELFObjectWriter.cpp
@@ -111,8 +111,8 @@ unsigned SystemZELFObjectWriter::getRelocType(const MCFixup &Fixup,
case SystemZ::S_TLSLD:
case SystemZ::S_TLSLDM:
case SystemZ::S_DTPOFF:
- if (auto *SA = Target.getAddSym())
- cast<MCSymbolELF>(SA)->setType(ELF::STT_TLS);
+ if (auto *SA = const_cast<MCSymbol *>(Target.getAddSym()))
+ static_cast<MCSymbolELF *>(SA)->setType(ELF::STT_TLS);
break;
default:
break;
diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
index b2cfd04..d692cbe 100644
--- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
+++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
@@ -113,8 +113,7 @@ public:
std::optional<MCFixupKind> getFixupKind(StringRef Name) const override;
MCFixupKindInfo getFixupKindInfo(MCFixupKind Kind) const override;
void applyFixup(const MCFragment &, const MCFixup &, const MCValue &Target,
- MutableArrayRef<char> Data, uint64_t Value,
- bool IsResolved) override;
+ uint8_t *Data, uint64_t Value, bool IsResolved) override;
bool writeNopData(raw_ostream &OS, uint64_t Count,
const MCSubtargetInfo *STI) const override;
};
@@ -152,20 +151,18 @@ MCFixupKindInfo SystemZMCAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
}
void SystemZMCAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
- const MCValue &Target,
- MutableArrayRef<char> Data, uint64_t Value,
- bool IsResolved) {
+ const MCValue &Target, uint8_t *Data,
+ uint64_t Value, bool IsResolved) {
if (Target.getSpecifier())
IsResolved = false;
maybeAddReloc(F, Fixup, Target, Value, IsResolved);
MCFixupKind Kind = Fixup.getKind();
if (mc::isRelocation(Kind))
return;
- unsigned Offset = Fixup.getOffset();
unsigned BitSize = getFixupKindInfo(Kind).TargetSize;
unsigned Size = (BitSize + 7) / 8;
- assert(Offset + Size <= F.getSize() && "Invalid fixup offset!");
+ assert(Fixup.getOffset() + Size <= F.getSize() && "Invalid fixup offset!");
// Big-endian insertion of Size bytes.
Value = extractBitsForFixup(Kind, Value, Fixup, getContext());
@@ -173,7 +170,7 @@ void SystemZMCAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
Value &= ((uint64_t)1 << BitSize) - 1;
unsigned ShiftValue = (Size * 8) - 8;
for (unsigned I = 0; I != Size; ++I) {
- Data[Offset + I] |= uint8_t(Value >> ShiftValue);
+ Data[I] |= uint8_t(Value >> ShiftValue);
ShiftValue -= 8;
}
}
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
index ae6ca55a36..783f86a 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -1286,7 +1286,7 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(
if ((Opcode == SystemZ::ALFI && OpNum == 0 &&
isInt<8>((int32_t)MI.getOperand(2).getImm())) ||
(Opcode == SystemZ::ALGFI && OpNum == 0 &&
- isInt<8>((int64_t)MI.getOperand(2).getImm()))) {
+ isInt<8>(MI.getOperand(2).getImm()))) {
// AL(G)FI %reg, CONST -> AL(G)SI %mem, CONST
Opcode = (Opcode == SystemZ::ALFI ? SystemZ::ALSI : SystemZ::ALGSI);
MachineInstr *BuiltMI =
@@ -1301,7 +1301,7 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(
if ((Opcode == SystemZ::SLFI && OpNum == 0 &&
isInt<8>((int32_t)-MI.getOperand(2).getImm())) ||
(Opcode == SystemZ::SLGFI && OpNum == 0 &&
- isInt<8>((int64_t)-MI.getOperand(2).getImm()))) {
+ isInt<8>((-MI.getOperand(2).getImm())))) {
// SL(G)FI %reg, CONST -> AL(G)SI %mem, -CONST
Opcode = (Opcode == SystemZ::SLFI ? SystemZ::ALSI : SystemZ::ALGSI);
MachineInstr *BuiltMI =
diff --git a/llvm/lib/Target/VE/MCTargetDesc/VEAsmBackend.cpp b/llvm/lib/Target/VE/MCTargetDesc/VEAsmBackend.cpp
index b02b6af..c1b9d9f 100644
--- a/llvm/lib/Target/VE/MCTargetDesc/VEAsmBackend.cpp
+++ b/llvm/lib/Target/VE/MCTargetDesc/VEAsmBackend.cpp
@@ -112,8 +112,7 @@ public:
}
void applyFixup(const MCFragment &, const MCFixup &, const MCValue &,
- MutableArrayRef<char>, uint64_t Value,
- bool IsResolved) override;
+ uint8_t *, uint64_t Value, bool IsResolved) override;
bool mayNeedRelaxation(unsigned Opcode, ArrayRef<MCOperand> Operands,
const MCSubtargetInfo &STI) const override {
@@ -152,7 +151,7 @@ public:
} // end anonymous namespace
void VEAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
- const MCValue &Target, MutableArrayRef<char> Data,
+ const MCValue &Target, uint8_t *Data,
uint64_t Value, bool IsResolved) {
switch (Fixup.getKind()) {
case VE::fixup_ve_tls_gd_hi32:
@@ -173,14 +172,14 @@ void VEAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
Value <<= Info.TargetOffset;
unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind());
- unsigned Offset = Fixup.getOffset();
- assert(Offset + NumBytes <= F.getSize() && "Invalid fixup offset!");
+ assert(Fixup.getOffset() + NumBytes <= F.getSize() &&
+ "Invalid fixup offset!");
// For each byte of the fragment that the fixup touches, mask in the bits
// from the fixup value. The Value has been "split up" into the
// appropriate bitfields above.
for (unsigned i = 0; i != NumBytes; ++i) {
unsigned Idx = Endian == llvm::endianness::little ? i : (NumBytes - 1) - i;
- Data[Offset + Idx] |= static_cast<uint8_t>((Value >> (i * 8)) & 0xff);
+ Data[Idx] |= static_cast<uint8_t>((Value >> (i * 8)) & 0xff);
}
}
diff --git a/llvm/lib/Target/VE/MCTargetDesc/VEELFObjectWriter.cpp b/llvm/lib/Target/VE/MCTargetDesc/VEELFObjectWriter.cpp
index 41f31eb..c702064 100644
--- a/llvm/lib/Target/VE/MCTargetDesc/VEELFObjectWriter.cpp
+++ b/llvm/lib/Target/VE/MCTargetDesc/VEELFObjectWriter.cpp
@@ -44,8 +44,8 @@ unsigned VEELFObjectWriter::getRelocType(const MCFixup &Fixup,
case VE::S_TLS_GD_LO32:
case VE::S_TPOFF_HI32:
case VE::S_TPOFF_LO32:
- if (auto *SA = Target.getAddSym())
- cast<MCSymbolELF>(SA)->setType(ELF::STT_TLS);
+ if (auto *SA = const_cast<MCSymbol *>(Target.getAddSym()))
+ static_cast<MCSymbolELF *>(SA)->setType(ELF::STT_TLS);
break;
default:
break;
diff --git a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
index 6ae69a4..80df4ed 100644
--- a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
+++ b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
@@ -212,12 +212,12 @@ static wasm::WasmLimits defaultLimits() {
static MCSymbolWasm *getOrCreateFunctionTableSymbol(MCContext &Ctx,
const StringRef &Name,
bool Is64) {
- MCSymbolWasm *Sym = cast_or_null<MCSymbolWasm>(Ctx.lookupSymbol(Name));
+ auto *Sym = static_cast<MCSymbolWasm *>(Ctx.lookupSymbol(Name));
if (Sym) {
if (!Sym->isFunctionTable())
Ctx.reportError(SMLoc(), "symbol is not a wasm funcref table");
} else {
- Sym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(Name));
+ Sym = static_cast<MCSymbolWasm *>(Ctx.getOrCreateSymbol(Name));
Sym->setFunctionTable(Is64);
// The default function table is synthesized by the linker.
Sym->setUndefined();
@@ -703,7 +703,7 @@ public:
ExpectBlockType = false;
// The "true" here will cause this to be a nameless symbol.
MCSymbol *Sym = Ctx.createTempSymbol("typeindex", true);
- auto *WasmSym = cast<MCSymbolWasm>(Sym);
+ auto *WasmSym = static_cast<MCSymbolWasm *>(Sym);
WasmSym->setSignature(Signature);
WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
const MCExpr *Expr =
@@ -949,7 +949,8 @@ public:
return error("Unknown type in .globaltype modifier: ", TypeTok);
}
// Now set this symbol with the correct type.
- auto *WasmSym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(SymName));
+ auto *WasmSym =
+ static_cast<MCSymbolWasm *>(Ctx.getOrCreateSymbol(SymName));
WasmSym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL);
WasmSym->setGlobalType(wasm::WasmGlobalType{uint8_t(*Type), Mutable});
// And emit the directive again.
@@ -980,7 +981,8 @@ public:
// Now that we have the name and table type, we can actually create the
// symbol
- auto *WasmSym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(SymName));
+ auto *WasmSym =
+ static_cast<MCSymbolWasm *>(Ctx.getOrCreateSymbol(SymName));
WasmSym->setType(wasm::WASM_SYMBOL_TYPE_TABLE);
if (Is64) {
Limits.Flags |= wasm::WASM_LIMITS_FLAG_IS_64;
@@ -1000,7 +1002,8 @@ public:
auto SymName = expectIdent();
if (SymName.empty())
return ParseStatus::Failure;
- auto *WasmSym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(SymName));
+ auto *WasmSym =
+ static_cast<MCSymbolWasm *>(Ctx.getOrCreateSymbol(SymName));
if (WasmSym->isDefined()) {
// We push 'Function' either when a label is parsed or a .functype
// directive is parsed. The reason it is not easy to do this uniformly
@@ -1042,7 +1045,8 @@ public:
auto ExportName = expectIdent();
if (ExportName.empty())
return ParseStatus::Failure;
- auto *WasmSym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(SymName));
+ auto *WasmSym =
+ static_cast<MCSymbolWasm *>(Ctx.getOrCreateSymbol(SymName));
WasmSym->setExportName(Ctx.allocateString(ExportName));
TOut.emitExportName(WasmSym, ExportName);
return expect(AsmToken::EndOfStatement, "EOL");
@@ -1057,7 +1061,8 @@ public:
auto ImportModule = expectIdent();
if (ImportModule.empty())
return ParseStatus::Failure;
- auto *WasmSym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(SymName));
+ auto *WasmSym =
+ static_cast<MCSymbolWasm *>(Ctx.getOrCreateSymbol(SymName));
WasmSym->setImportModule(Ctx.allocateString(ImportModule));
TOut.emitImportModule(WasmSym, ImportModule);
return expect(AsmToken::EndOfStatement, "EOL");
@@ -1072,7 +1077,8 @@ public:
auto ImportName = expectIdent();
if (ImportName.empty())
return ParseStatus::Failure;
- auto *WasmSym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(SymName));
+ auto *WasmSym =
+ static_cast<MCSymbolWasm *>(Ctx.getOrCreateSymbol(SymName));
WasmSym->setImportName(Ctx.allocateString(ImportName));
TOut.emitImportName(WasmSym, ImportName);
return expect(AsmToken::EndOfStatement, "EOL");
@@ -1082,7 +1088,8 @@ public:
auto SymName = expectIdent();
if (SymName.empty())
return ParseStatus::Failure;
- auto *WasmSym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(SymName));
+ auto *WasmSym =
+ static_cast<MCSymbolWasm *>(Ctx.getOrCreateSymbol(SymName));
auto *Signature = Ctx.createWasmSignature();
if (parseRegTypeList(Signature->Params))
return ParseStatus::Failure;
@@ -1224,7 +1231,7 @@ public:
if (!CWS->isText())
return;
- auto *WasmSym = cast<MCSymbolWasm>(Symbol);
+ auto *WasmSym = static_cast<MCSymbolWasm *>(Symbol);
// Unlike other targets, we don't allow data in text sections (labels
// declared with .type @object).
if (WasmSym->getType() == wasm::WASM_SYMBOL_TYPE_DATA) {
diff --git a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.cpp b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.cpp
index 4a305ab..6943888 100644
--- a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.cpp
+++ b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.cpp
@@ -258,7 +258,7 @@ bool WebAssemblyAsmTypeCheck::getGlobal(SMLoc ErrorLoc,
const MCSymbolRefExpr *SymRef;
if (getSymRef(ErrorLoc, GlobalOp, SymRef))
return true;
- const auto *WasmSym = cast<MCSymbolWasm>(&SymRef->getSymbol());
+ auto *WasmSym = static_cast<const MCSymbolWasm *>(&SymRef->getSymbol());
switch (WasmSym->getType().value_or(wasm::WASM_SYMBOL_TYPE_DATA)) {
case wasm::WASM_SYMBOL_TYPE_GLOBAL:
Type = static_cast<wasm::ValType>(WasmSym->getGlobalType().Type);
@@ -286,7 +286,7 @@ bool WebAssemblyAsmTypeCheck::getTable(SMLoc ErrorLoc, const MCOperand &TableOp,
const MCSymbolRefExpr *SymRef;
if (getSymRef(ErrorLoc, TableOp, SymRef))
return true;
- const auto *WasmSym = cast<MCSymbolWasm>(&SymRef->getSymbol());
+ auto *WasmSym = static_cast<const MCSymbolWasm *>(&SymRef->getSymbol());
if (WasmSym->getType().value_or(wasm::WASM_SYMBOL_TYPE_DATA) !=
wasm::WASM_SYMBOL_TYPE_TABLE)
return typeError(ErrorLoc, StringRef("symbol ") + WasmSym->getName() +
@@ -302,7 +302,7 @@ bool WebAssemblyAsmTypeCheck::getSignature(SMLoc ErrorLoc,
const MCSymbolRefExpr *SymRef = nullptr;
if (getSymRef(ErrorLoc, SigOp, SymRef))
return true;
- const auto *WasmSym = cast<MCSymbolWasm>(&SymRef->getSymbol());
+ auto *WasmSym = static_cast<const MCSymbolWasm *>(&SymRef->getSymbol());
Sig = WasmSym->getSignature();
if (!Sig || WasmSym->getType() != Type) {
diff --git a/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp b/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
index 0f7b27b..2a398d4 100644
--- a/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
+++ b/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
@@ -237,7 +237,7 @@ MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction(
} else {
// We don't have access to the signature, so create a symbol without one
MCSymbol *Sym = getContext().createTempSymbol("typeindex", true);
- auto *WasmSym = cast<MCSymbolWasm>(Sym);
+ auto *WasmSym = static_cast<MCSymbolWasm *>(Sym);
WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
const MCExpr *Expr = MCSymbolRefExpr::create(
WasmSym, WebAssembly::S_TYPEINDEX, getContext());
diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp
index 84eb15f..eecef31 100644
--- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp
+++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp
@@ -39,7 +39,7 @@ public:
MCFixupKindInfo getFixupKindInfo(MCFixupKind Kind) const override;
void applyFixup(const MCFragment &, const MCFixup &, const MCValue &Target,
- MutableArrayRef<char> Data, uint64_t Value, bool) override;
+ uint8_t *Data, uint64_t Value, bool) override;
std::unique_ptr<MCObjectTargetWriter>
createObjectTargetWriter() const override;
@@ -80,8 +80,7 @@ bool WebAssemblyAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count,
void WebAssemblyAsmBackend::applyFixup(const MCFragment &F,
const MCFixup &Fixup,
- const MCValue &Target,
- MutableArrayRef<char> Data,
+ const MCValue &Target, uint8_t *Data,
uint64_t Value, bool IsResolved) {
if (!IsResolved)
Asm->getWriter().recordRelocation(F, Fixup, Target, Value);
@@ -96,13 +95,13 @@ void WebAssemblyAsmBackend::applyFixup(const MCFragment &F,
// Shift the value into position.
Value <<= Info.TargetOffset;
- unsigned Offset = Fixup.getOffset();
- assert(Offset + NumBytes <= F.getSize() && "Invalid fixup offset!");
+ assert(Fixup.getOffset() + NumBytes <= F.getSize() &&
+ "Invalid fixup offset!");
// For each byte of the fragment that the fixup touches, mask in the
// bits from the fixup value.
for (unsigned I = 0; I != NumBytes; ++I)
- Data[Offset + I] |= uint8_t((Value >> (I * 8)) & 0xff);
+ Data[I] |= uint8_t((Value >> (I * 8)) & 0xff);
}
std::unique_ptr<MCObjectTargetWriter>
diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp
index 2e97215..d8bfed9 100644
--- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp
+++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp
@@ -380,7 +380,7 @@ void WebAssemblyInstPrinter::printWebAssemblySignatureOperand(const MCInst *MI,
O << WebAssembly::anyTypeToString(Imm);
} else {
auto Expr = cast<MCSymbolRefExpr>(Op.getExpr());
- auto *Sym = cast<MCSymbolWasm>(&Expr->getSymbol());
+ auto *Sym = static_cast<const MCSymbolWasm *>(&Expr->getSymbol());
if (Sym->getSignature()) {
O << WebAssembly::signatureToString(Sym->getSignature());
} else {
@@ -398,10 +398,10 @@ void WebAssemblyInstPrinter::printCatchList(const MCInst *MI, unsigned OpNo,
auto PrintTagOp = [&](const MCOperand &Op) {
const MCSymbolRefExpr *TagExpr = nullptr;
- const MCSymbolWasm *TagSym = nullptr;
+ const MCSymbol *TagSym = nullptr;
if (Op.isExpr()) {
TagExpr = cast<MCSymbolRefExpr>(Op.getExpr());
- TagSym = cast<MCSymbolWasm>(&TagExpr->getSymbol());
+ TagSym = &TagExpr->getSymbol();
O << TagSym->getName() << " ";
} else {
// When instructions are parsed from the disassembler, we have an
diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
index cbaf10f..7096104 100644
--- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
+++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
@@ -107,7 +107,7 @@ void WebAssemblyMCCodeEmitter::encodeInstruction(
encodeULEB128(uint32_t(MO.getImm()), OS);
break;
case WebAssembly::OPERAND_I64IMM:
- encodeSLEB128(int64_t(MO.getImm()), OS);
+ encodeSLEB128(MO.getImm(), OS);
break;
case WebAssembly::OPERAND_SIGNATURE:
case WebAssembly::OPERAND_VEC_I8IMM:
diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp
index 2cf4bec..ffbc7e1 100644
--- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp
+++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp
@@ -66,7 +66,7 @@ static const MCSection *getTargetSection(const MCExpr *Expr) {
unsigned WebAssemblyWasmObjectWriter::getRelocType(
const MCValue &Target, const MCFixup &Fixup,
const MCSectionWasm &FixupSection, bool IsLocRel) const {
- auto &SymA = cast<MCSymbolWasm>(*Target.getAddSym());
+ auto &SymA = static_cast<const MCSymbolWasm &>(*Target.getAddSym());
auto Spec = WebAssembly::Specifier(Target.getSpecifier());
switch (Spec) {
case WebAssembly::S_GOT:
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
index 1bf070e..db832bc 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
@@ -171,10 +171,10 @@ MCSymbolWasm *WebAssemblyAsmPrinter::getMCSymbolForFunction(
WebAssembly::signatureToString(Sig);
report_fatal_error(Twine(Msg));
}
- WasmSym = cast<MCSymbolWasm>(
+ WasmSym = static_cast<MCSymbolWasm *>(
GetExternalSymbolSymbol(getEmscriptenInvokeSymbolName(Sig)));
} else {
- WasmSym = cast<MCSymbolWasm>(getSymbol(F));
+ WasmSym = static_cast<MCSymbolWasm *>(getSymbol(F));
}
return WasmSym;
}
@@ -186,9 +186,7 @@ void WebAssemblyAsmPrinter::emitGlobalVariable(const GlobalVariable *GV) {
}
assert(!GV->isThreadLocal());
-
- MCSymbolWasm *Sym = cast<MCSymbolWasm>(getSymbol(GV));
-
+ auto *Sym = static_cast<MCSymbolWasm *>(getSymbol(GV));
if (!Sym->getType()) {
SmallVector<MVT, 1> VTs;
Type *GlobalVT = GV->getValueType();
@@ -218,8 +216,7 @@ void WebAssemblyAsmPrinter::emitGlobalVariable(const GlobalVariable *GV) {
}
MCSymbol *WebAssemblyAsmPrinter::getOrCreateWasmSymbol(StringRef Name) {
- auto *WasmSym = cast<MCSymbolWasm>(GetExternalSymbolSymbol(Name));
-
+ auto *WasmSym = static_cast<MCSymbolWasm *>(GetExternalSymbolSymbol(Name));
// May be called multiple times, so early out.
if (WasmSym->getType())
return WasmSym;
@@ -312,7 +309,7 @@ void WebAssemblyAsmPrinter::emitDecls(const Module &M) {
// not be found here.
MachineModuleInfoWasm &MMIW = MMI->getObjFileInfo<MachineModuleInfoWasm>();
for (StringRef Name : MMIW.MachineSymbolsUsed) {
- auto *WasmSym = cast<MCSymbolWasm>(getOrCreateWasmSymbol(Name));
+ auto *WasmSym = static_cast<MCSymbolWasm *>(getOrCreateWasmSymbol(Name));
if (WasmSym->isFunction()) {
// TODO(wvo): is there any case where this overlaps with the call to
// emitFunctionType in the loop below?
@@ -324,7 +321,7 @@ void WebAssemblyAsmPrinter::emitDecls(const Module &M) {
// Emit .globaltype, .tagtype, or .tabletype declarations for extern
// declarations, i.e. those that have only been declared (but not defined)
// in the current module
- auto Sym = cast_or_null<MCSymbolWasm>(It.getValue().Symbol);
+ auto Sym = static_cast<MCSymbolWasm *>(It.getValue().Symbol);
if (Sym && !Sym->isDefined())
emitSymbolType(Sym);
}
@@ -381,7 +378,7 @@ void WebAssemblyAsmPrinter::emitDecls(const Module &M) {
}
if (F.hasFnAttribute("wasm-export-name")) {
- auto *Sym = cast<MCSymbolWasm>(getSymbol(&F));
+ auto *Sym = static_cast<MCSymbolWasm *>(getSymbol(&F));
StringRef Name = F.getFnAttribute("wasm-export-name").getValueAsString();
Sym->setExportName(OutContext.allocateString(Name));
getTargetStreamer()->emitExportName(Sym, Name);
@@ -581,7 +578,7 @@ void WebAssemblyAsmPrinter::EmitFunctionAttributes(Module &M) {
auto *GV = cast<GlobalVariable>(CS->getOperand(1)->stripPointerCasts());
StringRef AnnotationString;
getConstantStringInfo(GV, AnnotationString);
- auto *Sym = cast<MCSymbolWasm>(getSymbol(F));
+ auto *Sym = static_cast<MCSymbolWasm *>(getSymbol(F));
CustomSections[AnnotationString].push_back(Sym);
}
@@ -618,7 +615,7 @@ void WebAssemblyAsmPrinter::emitFunctionBodyStart() {
computeSignatureVTs(F.getFunctionType(), &F, F, TM, ParamVTs, ResultVTs);
auto Signature = signatureFromMVTs(OutContext, ResultVTs, ParamVTs);
- auto *WasmSym = cast<MCSymbolWasm>(CurrentFnSym);
+ auto *WasmSym = static_cast<MCSymbolWasm *>(CurrentFnSym);
WasmSym->setSignature(Signature);
WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
index 4613fcb..e48283a 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
@@ -52,7 +52,7 @@ MCSymbol *
WebAssemblyMCInstLower::GetGlobalAddressSymbol(const MachineOperand &MO) const {
const GlobalValue *Global = MO.getGlobal();
if (!isa<Function>(Global)) {
- auto *WasmSym = cast<MCSymbolWasm>(Printer.getSymbol(Global));
+ auto *WasmSym = static_cast<MCSymbolWasm *>(Printer.getSymbol(Global));
// If the symbol doesn't have an explicit WasmSymbolType yet and the
// GlobalValue is actually a WebAssembly global, then ensure the symbol is a
// WASM_SYMBOL_TYPE_GLOBAL.
@@ -123,7 +123,7 @@ MCOperand WebAssemblyMCInstLower::lowerSymbolOperand(const MachineOperand &MO,
const MCExpr *Expr = MCSymbolRefExpr::create(Sym, Spec, Ctx);
if (MO.getOffset() != 0) {
- const auto *WasmSym = cast<MCSymbolWasm>(Sym);
+ const auto *WasmSym = static_cast<const MCSymbolWasm *>(Sym);
if (TargetFlags == WebAssemblyII::MO_GOT)
report_fatal_error("GOT symbol references do not support offsets");
if (WasmSym->isFunction())
@@ -148,12 +148,12 @@ MCOperand WebAssemblyMCInstLower::lowerTypeIndexOperand(
auto Signature = Ctx.createWasmSignature();
Signature->Returns = std::move(Returns);
Signature->Params = std::move(Params);
- MCSymbol *Sym = Printer.createTempSymbol("typeindex");
- auto *WasmSym = cast<MCSymbolWasm>(Sym);
- WasmSym->setSignature(Signature);
- WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
+ auto *Sym =
+ static_cast<MCSymbolWasm *>(Printer.createTempSymbol("typeindex"));
+ Sym->setSignature(Signature);
+ Sym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
const MCExpr *Expr =
- MCSymbolRefExpr::create(WasmSym, WebAssembly::S_TYPEINDEX, Ctx);
+ MCSymbolRefExpr::create(Sym, WebAssembly::S_TYPEINDEX, Ctx);
return MCOperand::createExpr(Expr);
}
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp
index 747ef18..42d1271 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp
@@ -104,13 +104,13 @@ const MachineOperand &WebAssembly::getCalleeOp(const MachineInstr &MI) {
MCSymbolWasm *WebAssembly::getOrCreateFunctionTableSymbol(
MCContext &Ctx, const WebAssemblySubtarget *Subtarget) {
StringRef Name = "__indirect_function_table";
- MCSymbolWasm *Sym = cast_or_null<MCSymbolWasm>(Ctx.lookupSymbol(Name));
+ auto *Sym = static_cast<MCSymbolWasm *>(Ctx.lookupSymbol(Name));
if (Sym) {
if (!Sym->isFunctionTable())
Ctx.reportError(SMLoc(), "symbol is not a wasm funcref table");
} else {
bool is64 = Subtarget && Subtarget->getTargetTriple().isArch64Bit();
- Sym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(Name));
+ Sym = static_cast<MCSymbolWasm *>(Ctx.getOrCreateSymbol(Name));
Sym->setFunctionTable(is64);
// The default function table is synthesized by the linker.
Sym->setUndefined();
@@ -124,12 +124,12 @@ MCSymbolWasm *WebAssembly::getOrCreateFunctionTableSymbol(
MCSymbolWasm *WebAssembly::getOrCreateFuncrefCallTableSymbol(
MCContext &Ctx, const WebAssemblySubtarget *Subtarget) {
StringRef Name = "__funcref_call_table";
- MCSymbolWasm *Sym = cast_or_null<MCSymbolWasm>(Ctx.lookupSymbol(Name));
+ auto *Sym = static_cast<MCSymbolWasm *>(Ctx.lookupSymbol(Name));
if (Sym) {
if (!Sym->isFunctionTable())
Ctx.reportError(SMLoc(), "symbol is not a wasm funcref table");
} else {
- Sym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(Name));
+ Sym = static_cast<MCSymbolWasm *>(Ctx.getOrCreateSymbol(Name));
// Setting Weak ensure only one table is left after linking when multiple
// modules define the table.
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index 1efef83..56a4cc3 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -174,8 +174,7 @@ public:
std::optional<bool> evaluateFixup(const MCFragment &, MCFixup &, MCValue &,
uint64_t &) override;
void applyFixup(const MCFragment &, const MCFixup &, const MCValue &Target,
- MutableArrayRef<char> Data, uint64_t Value,
- bool IsResolved) override;
+ uint8_t *Data, uint64_t Value, bool IsResolved) override;
bool mayNeedRelaxation(unsigned Opcode, ArrayRef<MCOperand> Operands,
const MCSubtargetInfo &STI) const override;
@@ -512,9 +511,8 @@ void X86AsmBackend::emitInstructionBegin(MCObjectStreamer &OS,
isFirstMacroFusibleInst(Inst, *MCII))) {
// If we meet a unfused branch or the first instuction in a fusiable pair,
// insert a BoundaryAlign fragment.
- PendingBA = OS.getContext().allocFragment<MCBoundaryAlignFragment>(
- AlignBoundary, STI);
- OS.insert(PendingBA);
+ PendingBA =
+ OS.newSpecialFragment<MCBoundaryAlignFragment>(AlignBoundary, STI);
}
}
@@ -676,9 +674,8 @@ std::optional<bool> X86AsmBackend::evaluateFixup(const MCFragment &,
}
void X86AsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
- const MCValue &Target,
- MutableArrayRef<char> Data, uint64_t Value,
- bool IsResolved) {
+ const MCValue &Target, uint8_t *Data,
+ uint64_t Value, bool IsResolved) {
// Force relocation when there is a specifier. This might be too conservative
// - GAS doesn't emit a relocation for call local@plt; local:.
if (Target.getSpecifier())
@@ -710,7 +707,7 @@ void X86AsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
}
for (unsigned i = 0; i != Size; ++i)
- Data[Fixup.getOffset() + i] = uint8_t(Value >> (i * 8));
+ Data[i] = uint8_t(Value >> (i * 8));
}
bool X86AsmBackend::mayNeedRelaxation(unsigned Opcode,
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
index 3323b38..ea0abdd 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
@@ -349,8 +349,8 @@ unsigned X86ELFObjectWriter::getRelocType(const MCFixup &Fixup,
case X86::S_TLSLDM:
case X86::S_TPOFF:
case X86::S_DTPOFF:
- if (auto *S = Target.getAddSym())
- cast<MCSymbolELF>(S)->setType(ELF::STT_TLS);
+ if (auto *S = const_cast<MCSymbol *>(Target.getAddSym()))
+ static_cast<MCSymbolELF *>(S)->setType(ELF::STT_TLS);
break;
default:
break;
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp
index b8e117b..ff27005 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp
@@ -369,7 +369,7 @@ void X86IntelInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
if (Op.isReg()) {
printRegName(O, Op.getReg());
} else if (Op.isImm()) {
- markup(O, Markup::Immediate) << formatImm((int64_t)Op.getImm());
+ markup(O, Markup::Immediate) << formatImm(Op.getImm());
} else {
assert(Op.isExpr() && "unknown operand kind in printOperand");
O << "offset ";
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index bbbb1d9..f366094 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -8279,8 +8279,8 @@ static SDValue ExpandHorizontalBinOp(const SDValue &V0, const SDValue &V1,
static bool isAddSubOrSubAdd(const BuildVectorSDNode *BV,
const X86Subtarget &Subtarget, SelectionDAG &DAG,
SDValue &Opnd0, SDValue &Opnd1,
- unsigned &NumExtracts,
- bool &IsSubAdd) {
+ unsigned &NumExtracts, bool &IsSubAdd,
+ bool &HasAllowContract) {
using namespace SDPatternMatch;
MVT VT = BV->getSimpleValueType(0);
@@ -8292,6 +8292,7 @@ static bool isAddSubOrSubAdd(const BuildVectorSDNode *BV,
SDValue InVec1 = DAG.getUNDEF(VT);
NumExtracts = 0;
+ HasAllowContract = NumElts != 0;
// Odd-numbered elements in the input build vector are obtained from
// adding/subtracting two integer/float elements.
@@ -8350,6 +8351,7 @@ static bool isAddSubOrSubAdd(const BuildVectorSDNode *BV,
// Increment the number of extractions done.
++NumExtracts;
+ HasAllowContract &= Op->getFlags().hasAllowContract();
}
// Ensure we have found an opcode for both parities and that they are
@@ -8393,9 +8395,10 @@ static bool isAddSubOrSubAdd(const BuildVectorSDNode *BV,
/// is illegal sometimes. E.g. 512-bit ADDSUB is not available, while 512-bit
/// FMADDSUB is.
static bool isFMAddSubOrFMSubAdd(const X86Subtarget &Subtarget,
- SelectionDAG &DAG,
- SDValue &Opnd0, SDValue &Opnd1, SDValue &Opnd2,
- unsigned ExpectedUses) {
+ SelectionDAG &DAG, SDValue &Opnd0,
+ SDValue &Opnd1, SDValue &Opnd2,
+ unsigned ExpectedUses,
+ bool AllowSubAddOrAddSubContract) {
if (Opnd0.getOpcode() != ISD::FMUL ||
!Opnd0->hasNUsesOfValue(ExpectedUses, 0) || !Subtarget.hasAnyFMA())
return false;
@@ -8406,7 +8409,8 @@ static bool isFMAddSubOrFMSubAdd(const X86Subtarget &Subtarget,
// or MUL + ADDSUB to FMADDSUB.
const TargetOptions &Options = DAG.getTarget().Options;
bool AllowFusion =
- (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath);
+ Options.AllowFPOpFusion == FPOpFusion::Fast ||
+ (AllowSubAddOrAddSubContract && Opnd0->getFlags().hasAllowContract());
if (!AllowFusion)
return false;
@@ -8427,15 +8431,17 @@ static SDValue lowerToAddSubOrFMAddSub(const BuildVectorSDNode *BV,
SDValue Opnd0, Opnd1;
unsigned NumExtracts;
bool IsSubAdd;
- if (!isAddSubOrSubAdd(BV, Subtarget, DAG, Opnd0, Opnd1, NumExtracts,
- IsSubAdd))
+ bool HasAllowContract;
+ if (!isAddSubOrSubAdd(BV, Subtarget, DAG, Opnd0, Opnd1, NumExtracts, IsSubAdd,
+ HasAllowContract))
return SDValue();
MVT VT = BV->getSimpleValueType(0);
// Try to generate X86ISD::FMADDSUB node here.
SDValue Opnd2;
- if (isFMAddSubOrFMSubAdd(Subtarget, DAG, Opnd0, Opnd1, Opnd2, NumExtracts)) {
+ if (isFMAddSubOrFMSubAdd(Subtarget, DAG, Opnd0, Opnd1, Opnd2, NumExtracts,
+ HasAllowContract)) {
unsigned Opc = IsSubAdd ? X86ISD::FMSUBADD : X86ISD::FMADDSUB;
return DAG.getNode(Opc, DL, VT, Opnd0, Opnd1, Opnd2);
}
@@ -9132,11 +9138,17 @@ LowerBUILD_VECTORAsVariablePermute(SDValue V, const SDLoc &DL,
SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
SDValue SrcVec, IndicesVec;
+
+ auto PeekThroughFreeze = [](SDValue N) {
+ if (N->getOpcode() == ISD::FREEZE && N.hasOneUse())
+ return N->getOperand(0);
+ return N;
+ };
// Check for a match of the permute source vector and permute index elements.
// This is done by checking that the i-th build_vector operand is of the form:
// (extract_elt SrcVec, (extract_elt IndicesVec, i)).
for (unsigned Idx = 0, E = V.getNumOperands(); Idx != E; ++Idx) {
- SDValue Op = V.getOperand(Idx);
+ SDValue Op = PeekThroughFreeze(V.getOperand(Idx));
if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
return SDValue();
@@ -23486,7 +23498,6 @@ static SDValue EmitCmp(SDValue Op0, SDValue Op1, X86::CondCode X86CC,
}
// Try to shrink i64 compares if the input has enough zero bits.
- // TODO: Add sign-bits equivalent for isX86CCSigned(X86CC)?
if (CmpVT == MVT::i64 && !isX86CCSigned(X86CC) &&
Op0.hasOneUse() && // Hacky way to not break CSE opportunities with sub.
DAG.MaskedValueIsZero(Op1, APInt::getHighBitsSet(64, 32)) &&
@@ -23496,6 +23507,16 @@ static SDValue EmitCmp(SDValue Op0, SDValue Op1, X86::CondCode X86CC,
Op1 = DAG.getNode(ISD::TRUNCATE, dl, CmpVT, Op1);
}
+ // Try to shrink all i64 compares if the inputs are representable as signed
+ // i32.
+ if (CmpVT == MVT::i64 &&
+ Op0.hasOneUse() && // Hacky way to not break CSE opportunities with sub.
+ DAG.ComputeNumSignBits(Op1) > 32 && DAG.ComputeNumSignBits(Op0) > 32) {
+ CmpVT = MVT::i32;
+ Op0 = DAG.getNode(ISD::TRUNCATE, dl, CmpVT, Op0);
+ Op1 = DAG.getNode(ISD::TRUNCATE, dl, CmpVT, Op1);
+ }
+
// 0-x == y --> x+y == 0
// 0-x != y --> x+y != 0
if (Op0.getOpcode() == ISD::SUB && isNullConstant(Op0.getOperand(0)) &&
@@ -43165,7 +43186,7 @@ static bool isAddSubOrSubAddMask(ArrayRef<int> Mask, bool &Op0Even) {
/// the fact that they're unused.
static bool isAddSubOrSubAdd(SDNode *N, const X86Subtarget &Subtarget,
SelectionDAG &DAG, SDValue &Opnd0, SDValue &Opnd1,
- bool &IsSubAdd) {
+ bool &IsSubAdd, bool &HasAllowContract) {
EVT VT = N->getValueType(0);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -43216,6 +43237,8 @@ static bool isAddSubOrSubAdd(SDNode *N, const X86Subtarget &Subtarget,
// It's a subadd if the vector in the even parity is an FADD.
IsSubAdd = Op0Even ? V1->getOpcode() == ISD::FADD
: V2->getOpcode() == ISD::FADD;
+ HasAllowContract =
+ V1->getFlags().hasAllowContract() && V2->getFlags().hasAllowContract();
Opnd0 = LHS;
Opnd1 = RHS;
@@ -43273,14 +43296,17 @@ static SDValue combineShuffleToAddSubOrFMAddSub(SDNode *N, const SDLoc &DL,
SDValue Opnd0, Opnd1;
bool IsSubAdd;
- if (!isAddSubOrSubAdd(N, Subtarget, DAG, Opnd0, Opnd1, IsSubAdd))
+ bool HasAllowContract;
+ if (!isAddSubOrSubAdd(N, Subtarget, DAG, Opnd0, Opnd1, IsSubAdd,
+ HasAllowContract))
return SDValue();
MVT VT = N->getSimpleValueType(0);
// Try to generate X86ISD::FMADDSUB node here.
SDValue Opnd2;
- if (isFMAddSubOrFMSubAdd(Subtarget, DAG, Opnd0, Opnd1, Opnd2, 2)) {
+ if (isFMAddSubOrFMSubAdd(Subtarget, DAG, Opnd0, Opnd1, Opnd2, 2,
+ HasAllowContract)) {
unsigned Opc = IsSubAdd ? X86ISD::FMSUBADD : X86ISD::FMADDSUB;
return DAG.getNode(Opc, DL, VT, Opnd0, Opnd1, Opnd2);
}
@@ -54220,7 +54246,7 @@ static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG,
}
// Try to form a MULHU or MULHS node by looking for
-// (trunc (srl (mul ext, ext), 16))
+// (trunc (srl (mul ext, ext), >= 16))
// TODO: This is X86 specific because we want to be able to handle wide types
// before type legalization. But we can only do it if the vector will be
// legalized via widening/splitting. Type legalization can't handle promotion
@@ -54245,10 +54271,16 @@ static SDValue combinePMULH(SDValue Src, EVT VT, const SDLoc &DL,
// First instruction should be a right shift by 16 of a multiply.
SDValue LHS, RHS;
+ APInt ShiftAmt;
if (!sd_match(Src,
- m_Srl(m_Mul(m_Value(LHS), m_Value(RHS)), m_SpecificInt(16))))
+ m_Srl(m_Mul(m_Value(LHS), m_Value(RHS)), m_ConstInt(ShiftAmt))))
return SDValue();
+ if (ShiftAmt.ult(16) || ShiftAmt.uge(InVT.getScalarSizeInBits()))
+ return SDValue();
+
+ uint64_t AdditionalShift = ShiftAmt.getZExtValue() - 16;
+
// Count leading sign/zero bits on both inputs - if there are enough then
// truncation back to vXi16 will be cheap - either as a pack/shuffle
// sequence or using AVX512 truncations. If the inputs are sext/zext then the
@@ -54286,7 +54318,9 @@ static SDValue combinePMULH(SDValue Src, EVT VT, const SDLoc &DL,
InVT.getSizeInBits() / 16);
SDValue Res = DAG.getNode(ISD::MULHU, DL, BCVT, DAG.getBitcast(BCVT, LHS),
DAG.getBitcast(BCVT, RHS));
- return DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getBitcast(InVT, Res));
+ Res = DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getBitcast(InVT, Res));
+ return DAG.getNode(ISD::SRL, DL, VT, Res,
+ DAG.getShiftAmountConstant(AdditionalShift, VT, DL));
}
// Truncate back to source type.
@@ -54294,7 +54328,9 @@ static SDValue combinePMULH(SDValue Src, EVT VT, const SDLoc &DL,
RHS = DAG.getNode(ISD::TRUNCATE, DL, VT, RHS);
unsigned Opc = IsSigned ? ISD::MULHS : ISD::MULHU;
- return DAG.getNode(Opc, DL, VT, LHS, RHS);
+ SDValue Res = DAG.getNode(Opc, DL, VT, LHS, RHS);
+ return DAG.getNode(ISD::SRL, DL, VT, Res,
+ DAG.getShiftAmountConstant(AdditionalShift, VT, DL));
}
// Attempt to match PMADDUBSW, which multiplies corresponding unsigned bytes
diff --git a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
index 5862c7e..7c594d0 100644
--- a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
+++ b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
@@ -2781,6 +2781,38 @@ bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
return Bytes == MFI.getObjectSize(FI);
}
+static bool
+mayBeSRetTailCallCompatible(const TargetLowering::CallLoweringInfo &CLI,
+ Register CallerSRetReg) {
+ const auto &Outs = CLI.Outs;
+ const auto &OutVals = CLI.OutVals;
+
+ // We know the caller has a sret pointer argument (CallerSRetReg). Locate the
+ // operand index within the callee that may have a sret pointer too.
+ unsigned Pos = 0;
+ for (unsigned E = Outs.size(); Pos != E; ++Pos)
+ if (Outs[Pos].Flags.isSRet())
+ break;
+ // Bail out if the callee has not any sret argument.
+ if (Pos == Outs.size())
+ return false;
+
+ // At this point, either the caller is forwarding its sret argument to the
+ // callee, or the callee is being passed a different sret pointer. We now look
+ // for a CopyToReg, where the callee sret argument is written into a new vreg
+ // (which should later be %rax/%eax, if this is returned).
+ SDValue SRetArgVal = OutVals[Pos];
+ for (SDNode *User : SRetArgVal->users()) {
+ if (User->getOpcode() != ISD::CopyToReg)
+ continue;
+ Register Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
+ if (Reg == CallerSRetReg && User->getOperand(2) == SRetArgVal)
+ return true;
+ }
+
+ return false;
+}
+
/// Check whether the call is eligible for tail call optimization. Targets
/// that want to do tail call optimization should implement this function.
/// Note that the x86 backend does not check musttail calls for eligibility! The
@@ -2802,6 +2834,7 @@ bool X86TargetLowering::IsEligibleForTailCallOptimization(
// If -tailcallopt is specified, make fastcc functions tail-callable.
MachineFunction &MF = DAG.getMachineFunction();
+ X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
const Function &CallerF = MF.getFunction();
// If the function return type is x86_fp80 and the callee return type is not,
@@ -2838,14 +2871,15 @@ bool X86TargetLowering::IsEligibleForTailCallOptimization(
if (RegInfo->hasStackRealignment(MF))
return false;
- // Also avoid sibcall optimization if we're an sret return fn and the callee
- // is incompatible. See comment in LowerReturn about why hasStructRetAttr is
- // insufficient.
- if (MF.getInfo<X86MachineFunctionInfo>()->getSRetReturnReg()) {
+ // Avoid sibcall optimization if we are an sret return function and the callee
+ // is incompatible, unless such premises are proven wrong. See comment in
+ // LowerReturn about why hasStructRetAttr is insufficient.
+ if (Register SRetReg = FuncInfo->getSRetReturnReg()) {
// For a compatible tail call the callee must return our sret pointer. So it
// needs to be (a) an sret function itself and (b) we pass our sret as its
// sret. Condition #b is harder to determine.
- return false;
+ if (!mayBeSRetTailCallCompatible(CLI, SRetReg))
+ return false;
} else if (IsCalleePopSRet)
// The callee pops an sret, so we cannot tail-call, as our caller doesn't
// expect that.
@@ -2967,8 +3001,7 @@ bool X86TargetLowering::IsEligibleForTailCallOptimization(
X86::isCalleePop(CalleeCC, Subtarget.is64Bit(), isVarArg,
MF.getTarget().Options.GuaranteedTailCallOpt);
- if (unsigned BytesToPop =
- MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn()) {
+ if (unsigned BytesToPop = FuncInfo->getBytesToPopOnReturn()) {
// If we have bytes to pop, the callee must pop them.
bool CalleePopMatches = CalleeWillPop && BytesToPop == StackArgsSize;
if (!CalleePopMatches)
diff --git a/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaAsmBackend.cpp b/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaAsmBackend.cpp
index 9167794..08936ad 100644
--- a/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaAsmBackend.cpp
+++ b/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaAsmBackend.cpp
@@ -37,8 +37,7 @@ public:
std::optional<bool> evaluateFixup(const MCFragment &, MCFixup &, MCValue &,
uint64_t &) override;
void applyFixup(const MCFragment &, const MCFixup &, const MCValue &Target,
- MutableArrayRef<char> Data, uint64_t Value,
- bool IsResolved) override;
+ uint8_t *Data, uint64_t Value, bool IsResolved) override;
bool writeNopData(raw_ostream &OS, uint64_t Count,
const MCSubtargetInfo *STI) const override;
@@ -153,9 +152,8 @@ std::optional<bool> XtensaAsmBackend::evaluateFixup(const MCFragment &F,
}
void XtensaAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
- const MCValue &Target,
- MutableArrayRef<char> Data, uint64_t Value,
- bool IsResolved) {
+ const MCValue &Target, uint8_t *Data,
+ uint64_t Value, bool IsResolved) {
maybeAddReloc(F, Fixup, Target, Value, IsResolved);
MCContext &Ctx = getContext();
MCFixupKindInfo Info = getFixupKindInfo(Fixup.getKind());
@@ -168,11 +166,10 @@ void XtensaAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
if (!Value)
return; // Doesn't change encoding.
- unsigned Offset = Fixup.getOffset();
unsigned FullSize = getSize(Fixup.getKind());
for (unsigned i = 0; i != FullSize; ++i) {
- Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff);
+ Data[i] |= uint8_t((Value >> (i * 8)) & 0xff);
}
}
diff --git a/llvm/lib/TargetParser/ARMTargetParser.cpp b/llvm/lib/TargetParser/ARMTargetParser.cpp
index dcb30b7..08944e6 100644
--- a/llvm/lib/TargetParser/ARMTargetParser.cpp
+++ b/llvm/lib/TargetParser/ARMTargetParser.cpp
@@ -535,9 +535,8 @@ void ARM::fillValidCPUArchList(SmallVectorImpl<StringRef> &Values) {
}
}
-StringRef ARM::computeDefaultTargetABI(const Triple &TT, StringRef CPU) {
- StringRef ArchName =
- CPU.empty() ? TT.getArchName() : getArchName(parseCPUArch(CPU));
+StringRef ARM::computeDefaultTargetABI(const Triple &TT) {
+ StringRef ArchName = TT.getArchName();
if (TT.isOSBinFormatMachO()) {
if (TT.getEnvironment() == Triple::EABI ||
@@ -575,10 +574,9 @@ StringRef ARM::computeDefaultTargetABI(const Triple &TT, StringRef CPU) {
}
}
-ARM::ARMABI ARM::computeTargetABI(const Triple &TT, StringRef CPU,
- StringRef ABIName) {
+ARM::ARMABI ARM::computeTargetABI(const Triple &TT, StringRef ABIName) {
if (ABIName.empty())
- ABIName = ARM::computeDefaultTargetABI(TT, CPU);
+ ABIName = ARM::computeDefaultTargetABI(TT);
if (ABIName == "aapcs16")
return ARM_ABI_AAPCS16;
diff --git a/llvm/lib/TargetParser/TargetParser.cpp b/llvm/lib/TargetParser/TargetParser.cpp
index 126be71..19a16ea 100644
--- a/llvm/lib/TargetParser/TargetParser.cpp
+++ b/llvm/lib/TargetParser/TargetParser.cpp
@@ -379,6 +379,8 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
Features["atomic-fadd-rtn-insts"] = true;
Features["atomic-flat-pk-add-16-insts"] = true;
Features["atomic-global-pk-add-bf16-inst"] = true;
+ Features["bf16-trans-insts"] = true;
+ Features["bf16-cvt-insts"] = true;
Features["bf8-cvt-scale-insts"] = true;
Features["bitop3-insts"] = true;
Features["ci-insts"] = true;
@@ -401,9 +403,10 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
Features["f32-to-f16bf16-cvt-sr-insts"] = true;
Features["fp4-cvt-scale-insts"] = true;
Features["fp6bf6-cvt-scale-insts"] = true;
- Features["fp8-insts"] = true;
+ Features["fp8e5m3-insts"] = true;
Features["fp8-conversion-insts"] = true;
Features["fp8-cvt-scale-insts"] = true;
+ Features["fp8-insts"] = true;
Features["gfx8-insts"] = true;
Features["gfx9-insts"] = true;
Features["gfx90a-insts"] = true;
@@ -413,17 +416,23 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
Features["gfx10-3-insts"] = true;
Features["gfx11-insts"] = true;
Features["gfx12-insts"] = true;
+ Features["gfx1250-insts"] = true;
Features["gws"] = true;
Features["image-insts"] = true;
- Features["s-memrealtime"] = true;
- Features["s-memtime-inst"] = true;
Features["mai-insts"] = true;
Features["permlane16-swap"] = true;
Features["permlane32-swap"] = true;
Features["prng-inst"] = true;
+ Features["setprio-inc-wg-inst"] = true;
+ Features["s-memrealtime"] = true;
+ Features["s-memtime-inst"] = true;
+ Features["tanh-insts"] = true;
+ Features["tensor-cvt-lut-insts"] = true;
+ Features["transpose-load-f4f6-insts"] = true;
+ Features["vmem-pref-insts"] = true;
+ Features["vmem-to-lds-load-insts"] = true;
Features["wavefrontsize32"] = true;
Features["wavefrontsize64"] = true;
- Features["vmem-to-lds-load-insts"] = true;
} else if (T.isAMDGCN()) {
AMDGPU::GPUKind Kind = parseArchAMDGCN(GPU);
switch (Kind) {
@@ -444,6 +453,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
Features["bitop3-insts"] = true;
Features["prng-inst"] = true;
Features["tanh-insts"] = true;
+ Features["tensor-cvt-lut-insts"] = true;
Features["transpose-load-f4f6-insts"] = true;
Features["bf16-trans-insts"] = true;
Features["bf16-cvt-insts"] = true;
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index b268fea..cf94d28 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -113,10 +113,16 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(
LoadInst *LI, GetElementPtrInst *GEP, GlobalVariable *GV, CmpInst &ICI,
ConstantInt *AndCst) {
if (LI->isVolatile() || LI->getType() != GEP->getResultElementType() ||
- GV->getValueType() != GEP->getSourceElementType() || !GV->isConstant() ||
+ !GV->getValueType()->isArrayTy() || !GV->isConstant() ||
!GV->hasDefinitiveInitializer())
return nullptr;
+ Type *GEPSrcEltTy = GEP->getSourceElementType();
+ if (GEPSrcEltTy->isArrayTy())
+ GEPSrcEltTy = GEPSrcEltTy->getArrayElementType();
+ if (GV->getValueType()->getArrayElementType() != GEPSrcEltTy)
+ return nullptr;
+
Constant *Init = GV->getInitializer();
if (!isa<ConstantArray>(Init) && !isa<ConstantDataArray>(Init))
return nullptr;
@@ -127,12 +133,19 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(
return nullptr;
// There are many forms of this optimization we can handle, for now, just do
- // the simple index into a single-dimensional array.
+ // the simple index into a single-dimensional array or elements of equal size.
//
- // Require: GEP GV, 0, i {{, constant indices}}
- if (GEP->getNumOperands() < 3 || !isa<ConstantInt>(GEP->getOperand(1)) ||
- !cast<ConstantInt>(GEP->getOperand(1))->isZero() ||
- isa<Constant>(GEP->getOperand(2)))
+ // Require: GEP [n x i8] GV, 0, Idx {{, constant indices}}
+ // Or: GEP i8 GV, Idx
+
+ unsigned GEPIdxOp = 1;
+ if (GEP->getSourceElementType()->isArrayTy()) {
+ GEPIdxOp = 2;
+ if (!match(GEP->getOperand(1), m_ZeroInt()))
+ return nullptr;
+ }
+ if (GEP->getNumOperands() < GEPIdxOp + 1 ||
+ isa<Constant>(GEP->getOperand(GEPIdxOp)))
return nullptr;
// Check that indices after the variable are constants and in-range for the
@@ -141,7 +154,7 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(
SmallVector<unsigned, 4> LaterIndices;
Type *EltTy = Init->getType()->getArrayElementType();
- for (unsigned i = 3, e = GEP->getNumOperands(); i != e; ++i) {
+ for (unsigned i = GEPIdxOp + 1, e = GEP->getNumOperands(); i != e; ++i) {
ConstantInt *Idx = dyn_cast<ConstantInt>(GEP->getOperand(i));
if (!Idx)
return nullptr; // Variable index.
@@ -163,7 +176,7 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(
LaterIndices.push_back(IdxVal);
}
- Value *Idx = GEP->getOperand(2);
+ Value *Idx = GEP->getOperand(GEPIdxOp);
// If the index type is non-canonical, wait for it to be canonicalized.
if (Idx->getType() != DL.getIndexType(GEP->getType()))
return nullptr;
@@ -6077,7 +6090,7 @@ Instruction *InstCombinerImpl::foldICmpEquality(ICmpInst &I) {
match(Op1, m_OneUse(m_LShr(m_Value(B), m_APIntAllowPoison(AP2))))) ||
(match(Op0, m_OneUse(m_AShr(m_Value(A), m_APIntAllowPoison(AP1)))) &&
match(Op1, m_OneUse(m_AShr(m_Value(B), m_APIntAllowPoison(AP2)))))) {
- if (AP1 != AP2)
+ if (*AP1 != *AP2)
return nullptr;
unsigned TypeBits = AP1->getBitWidth();
unsigned ShAmt = AP1->getLimitedValue(TypeBits);
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 0e3436d..f17fecd 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -1834,14 +1834,17 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
// segfaults which didn't exist in the original program.
APInt DemandedPtrs(APInt::getAllOnes(VWidth)),
DemandedPassThrough(DemandedElts);
- if (auto *CV = dyn_cast<ConstantVector>(II->getOperand(2)))
+ if (auto *CMask = dyn_cast<Constant>(II->getOperand(2))) {
for (unsigned i = 0; i < VWidth; i++) {
- Constant *CElt = CV->getAggregateElement(i);
- if (CElt->isNullValue())
- DemandedPtrs.clearBit(i);
- else if (CElt->isAllOnesValue())
- DemandedPassThrough.clearBit(i);
+ if (Constant *CElt = CMask->getAggregateElement(i)) {
+ if (CElt->isNullValue())
+ DemandedPtrs.clearBit(i);
+ else if (CElt->isAllOnesValue())
+ DemandedPassThrough.clearBit(i);
+ }
}
+ }
+
if (II->getIntrinsicID() == Intrinsic::masked_gather)
simplifyAndSetOp(II, 0, DemandedPtrs, PoisonElts2);
simplifyAndSetOp(II, 3, DemandedPassThrough, PoisonElts3);
diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index e87bee7..8da65c5 100644
--- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -1222,9 +1222,9 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
!ConstantInt::isValueValidForType(IntptrTy, SizeValue))
return;
// Find alloca instruction that corresponds to llvm.lifetime argument.
- AllocaInst *AI = cast<AllocaInst>(II.getArgOperand(1));
+ AllocaInst *AI = dyn_cast<AllocaInst>(II.getArgOperand(1));
// We're interested only in allocas we can handle.
- if (!ASan.isInterestingAlloca(*AI))
+ if (!AI || !ASan.isInterestingAlloca(*AI))
return;
bool DoPoison = (ID == Intrinsic::lifetime_end);
AllocaPoisonCall APC = {&II, AI, SizeValue, DoPoison};
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 54d9a83..7d3c940 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -3301,8 +3301,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
void handleLifetimeStart(IntrinsicInst &I) {
if (!PoisonStack)
return;
- AllocaInst *AI = cast<AllocaInst>(I.getArgOperand(1));
- LifetimeStartList.push_back(std::make_pair(&I, AI));
+ AllocaInst *AI = dyn_cast<AllocaInst>(I.getArgOperand(1));
+ if (AI)
+ LifetimeStartList.push_back(std::make_pair(&I, AI));
}
void handleBswap(IntrinsicInst &I) {
diff --git a/llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp
index fa1db28..d18c0d0 100644
--- a/llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp
@@ -468,7 +468,8 @@ private:
// Floating-point constants.
Type *Ty = Config.getExtendedFPType(CFP->getType());
return ConstantFP::get(
- Ty, extendConstantFP(CFP->getValueAPF(), Ty->getFltSemantics()));
+ Ty, extendConstantFP(CFP->getValueAPF(),
+ Ty->getScalarType()->getFltSemantics()));
}
// Vector, array, or aggregate constants.
if (C->getType()->isVectorTy()) {
diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp
index f6bf09d..7704e49 100644
--- a/llvm/lib/Transforms/Scalar/GVN.cpp
+++ b/llvm/lib/Transforms/Scalar/GVN.cpp
@@ -2499,9 +2499,13 @@ void GVNPass::assignBlockRPONumber(Function &F) {
bool GVNPass::replaceOperandsForInBlockEquality(Instruction *Instr) const {
bool Changed = false;
for (unsigned OpNum = 0; OpNum < Instr->getNumOperands(); ++OpNum) {
- Value *Operand = Instr->getOperand(OpNum);
- auto It = ReplaceOperandsWithMap.find(Operand);
+ Use &Operand = Instr->getOperandUse(OpNum);
+ auto It = ReplaceOperandsWithMap.find(Operand.get());
if (It != ReplaceOperandsWithMap.end()) {
+ const DataLayout &DL = Instr->getDataLayout();
+ if (!canReplacePointersInUseIfEqual(Operand, It->second, DL))
+ continue;
+
LLVM_DEBUG(dbgs() << "GVN replacing: " << *Operand << " with "
<< *It->second << " in instruction " << *Instr << '\n');
Instr->setOperand(OpNum, It->second);
@@ -2679,6 +2683,11 @@ bool GVNPass::propagateEquality(Value *LHS, Value *RHS,
Worklist.emplace_back(A, ConstantInt::get(A->getType(), IsKnownTrue));
continue;
}
+
+ if (match(LHS, m_Not(m_Value(A)))) {
+ Worklist.emplace_back(A, ConstantInt::get(A->getType(), !IsKnownTrue));
+ continue;
+ }
}
return Changed;
diff --git a/llvm/lib/Transforms/Scalar/GVNSink.cpp b/llvm/lib/Transforms/Scalar/GVNSink.cpp
index a5fc0b4..1c88532 100644
--- a/llvm/lib/Transforms/Scalar/GVNSink.cpp
+++ b/llvm/lib/Transforms/Scalar/GVNSink.cpp
@@ -308,7 +308,7 @@ public:
for (auto &U : I->uses())
op_push_back(U.getUser());
- llvm::sort(op_begin(), op_end());
+ llvm::sort(operands());
}
void setMemoryUseOrder(unsigned MUO) { MemoryUseOrder = MUO; }
diff --git a/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp b/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
index 6bdf76f..a883998 100644
--- a/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
@@ -117,7 +117,7 @@ struct StoreToLoadForwardingCandidate {
if (std::abs(StrideLoad) != 1)
return false;
- unsigned TypeByteSize = DL.getTypeAllocSize(const_cast<Type *>(LoadType));
+ unsigned TypeByteSize = DL.getTypeAllocSize(LoadType);
auto *LoadPtrSCEV = cast<SCEVAddRecExpr>(PSE.getSCEV(LoadPtr));
auto *StorePtrSCEV = cast<SCEVAddRecExpr>(PSE.getSCEV(StorePtr));
diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index 23256cf..03d9f32 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -1247,8 +1247,7 @@ private:
"Map index doesn't point back to a slice with this user.");
}
- // Disable SRoA for any intrinsics except for lifetime invariants and
- // invariant group.
+ // Disable SRoA for any intrinsics except for lifetime invariants.
// FIXME: What about debug intrinsics? This matches old behavior, but
// doesn't make sense.
void visitIntrinsicInst(IntrinsicInst &II) {
@@ -1268,12 +1267,6 @@ private:
return;
}
- if (II.isLaunderOrStripInvariantGroup()) {
- insertUse(II, Offset, AllocSize, true);
- enqueueUsers(II);
- return;
- }
-
Base::visitIntrinsicInst(II);
}
@@ -3607,8 +3600,7 @@ private:
}
bool visitIntrinsicInst(IntrinsicInst &II) {
- assert((II.isLifetimeStartOrEnd() || II.isLaunderOrStripInvariantGroup() ||
- II.isDroppable()) &&
+ assert((II.isLifetimeStartOrEnd() || II.isDroppable()) &&
"Unexpected intrinsic!");
LLVM_DEBUG(dbgs() << " original: " << II << "\n");
@@ -3622,9 +3614,6 @@ private:
return true;
}
- if (II.isLaunderOrStripInvariantGroup())
- return true;
-
assert(II.getArgOperand(1) == OldPtr);
// Lifetime intrinsics are only promotable if they cover the whole alloca.
// Therefore, we drop lifetime intrinsics which don't cover the whole
diff --git a/llvm/lib/Transforms/Utils/CMakeLists.txt b/llvm/lib/Transforms/Utils/CMakeLists.txt
index a4fa0e2..e411d68 100644
--- a/llvm/lib/Transforms/Utils/CMakeLists.txt
+++ b/llvm/lib/Transforms/Utils/CMakeLists.txt
@@ -84,6 +84,7 @@ add_llvm_component_library(LLVMTransformUtils
SimplifyLibCalls.cpp
SizeOpts.cpp
SplitModule.cpp
+ SplitModuleByCategory.cpp
StripNonLineTableDebugInfo.cpp
SymbolRewriter.cpp
UnifyFunctionExitNodes.cpp
diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp
index babd7f6..2619e73 100644
--- a/llvm/lib/Transforms/Utils/Local.cpp
+++ b/llvm/lib/Transforms/Utils/Local.cpp
@@ -482,6 +482,9 @@ bool llvm::wouldInstructionBeTriviallyDead(const Instruction *I,
if (II->isLifetimeStartOrEnd()) {
auto *Arg = II->getArgOperand(1);
+ if (isa<PoisonValue>(Arg))
+ return true;
+
// If the only uses of the alloca are lifetime intrinsics, then the
// intrinsics are dead.
return llvm::all_of(Arg->uses(), [](Use &Use) {
@@ -3180,9 +3183,8 @@ void llvm::patchReplacementInstruction(Instruction *I, Value *Repl) {
combineMetadataForCSE(ReplInst, I, false);
}
-template <typename RootType, typename ShouldReplaceFn>
+template <typename ShouldReplaceFn>
static unsigned replaceDominatedUsesWith(Value *From, Value *To,
- const RootType &Root,
const ShouldReplaceFn &ShouldReplace) {
assert(From->getType() == To->getType());
@@ -3191,7 +3193,7 @@ static unsigned replaceDominatedUsesWith(Value *From, Value *To,
auto *II = dyn_cast<IntrinsicInst>(U.getUser());
if (II && II->getIntrinsicID() == Intrinsic::fake_use)
continue;
- if (!ShouldReplace(Root, U))
+ if (!ShouldReplace(U))
continue;
LLVM_DEBUG(dbgs() << "Replace dominated use of '";
From->printAsOperand(dbgs());
@@ -3220,39 +3222,33 @@ unsigned llvm::replaceNonLocalUsesWith(Instruction *From, Value *To) {
unsigned llvm::replaceDominatedUsesWith(Value *From, Value *To,
DominatorTree &DT,
const BasicBlockEdge &Root) {
- auto Dominates = [&DT](const BasicBlockEdge &Root, const Use &U) {
- return DT.dominates(Root, U);
- };
- return ::replaceDominatedUsesWith(From, To, Root, Dominates);
+ auto Dominates = [&](const Use &U) { return DT.dominates(Root, U); };
+ return ::replaceDominatedUsesWith(From, To, Dominates);
}
unsigned llvm::replaceDominatedUsesWith(Value *From, Value *To,
DominatorTree &DT,
const BasicBlock *BB) {
- auto Dominates = [&DT](const BasicBlock *BB, const Use &U) {
- return DT.dominates(BB, U);
- };
- return ::replaceDominatedUsesWith(From, To, BB, Dominates);
+ auto Dominates = [&](const Use &U) { return DT.dominates(BB, U); };
+ return ::replaceDominatedUsesWith(From, To, Dominates);
}
unsigned llvm::replaceDominatedUsesWithIf(
Value *From, Value *To, DominatorTree &DT, const BasicBlockEdge &Root,
function_ref<bool(const Use &U, const Value *To)> ShouldReplace) {
- auto DominatesAndShouldReplace =
- [&DT, &ShouldReplace, To](const BasicBlockEdge &Root, const Use &U) {
- return DT.dominates(Root, U) && ShouldReplace(U, To);
- };
- return ::replaceDominatedUsesWith(From, To, Root, DominatesAndShouldReplace);
+ auto DominatesAndShouldReplace = [&](const Use &U) {
+ return DT.dominates(Root, U) && ShouldReplace(U, To);
+ };
+ return ::replaceDominatedUsesWith(From, To, DominatesAndShouldReplace);
}
unsigned llvm::replaceDominatedUsesWithIf(
Value *From, Value *To, DominatorTree &DT, const BasicBlock *BB,
function_ref<bool(const Use &U, const Value *To)> ShouldReplace) {
- auto DominatesAndShouldReplace = [&DT, &ShouldReplace,
- To](const BasicBlock *BB, const Use &U) {
+ auto DominatesAndShouldReplace = [&](const Use &U) {
return DT.dominates(BB, U) && ShouldReplace(U, To);
};
- return ::replaceDominatedUsesWith(From, To, BB, DominatesAndShouldReplace);
+ return ::replaceDominatedUsesWith(From, To, DominatesAndShouldReplace);
}
bool llvm::callsGCLeafFunction(const CallBase *Call,
diff --git a/llvm/lib/Transforms/Utils/MemoryTaggingSupport.cpp b/llvm/lib/Transforms/Utils/MemoryTaggingSupport.cpp
index bea76d3..472c03f 100644
--- a/llvm/lib/Transforms/Utils/MemoryTaggingSupport.cpp
+++ b/llvm/lib/Transforms/Utils/MemoryTaggingSupport.cpp
@@ -155,8 +155,9 @@ void StackInfoBuilder::visit(OptimizationRemarkEmitter &ORE,
return;
}
if (auto *II = dyn_cast<LifetimeIntrinsic>(&Inst)) {
- AllocaInst *AI = cast<AllocaInst>(II->getArgOperand(1));
- if (getAllocaInterestingness(*AI) != AllocaInterestingness::kInteresting)
+ AllocaInst *AI = dyn_cast<AllocaInst>(II->getArgOperand(1));
+ if (!AI ||
+ getAllocaInterestingness(*AI) != AllocaInterestingness::kInteresting)
return;
if (II->getIntrinsicID() == Intrinsic::lifetime_start)
Info.AllocasToInstrument[AI].LifetimeStart.push_back(II);
diff --git a/llvm/lib/Transforms/Utils/PredicateInfo.cpp b/llvm/lib/Transforms/Utils/PredicateInfo.cpp
index de9deab..b22ecbc 100644
--- a/llvm/lib/Transforms/Utils/PredicateInfo.cpp
+++ b/llvm/lib/Transforms/Utils/PredicateInfo.cpp
@@ -721,7 +721,7 @@ PredicateInfo::~PredicateInfo() {
CreatedDeclarations.clear();
for (Function *F : FunctionPtrs) {
- assert(F->user_begin() == F->user_end() &&
+ assert(F->users().empty() &&
"PredicateInfo consumer did not remove all SSA copies.");
F->eraseFromParent();
}
diff --git a/llvm/lib/Transforms/Utils/ProfileVerify.cpp b/llvm/lib/Transforms/Utils/ProfileVerify.cpp
index b972132..d67192f 100644
--- a/llvm/lib/Transforms/Utils/ProfileVerify.cpp
+++ b/llvm/lib/Transforms/Utils/ProfileVerify.cpp
@@ -20,8 +20,12 @@
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/ProfDataUtils.h"
#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/CommandLine.h"
using namespace llvm;
+static cl::opt<int64_t>
+ DefaultFunctionEntryCount("profcheck-default-function-entry-count",
+ cl::init(1000));
namespace {
class ProfileInjector {
Function &F;
@@ -63,6 +67,19 @@ bool ProfileInjector::inject() {
// will get the same BPI it does if the injector wasn't running.
auto &BPI = FAM.getResult<BranchProbabilityAnalysis>(F);
+ // Inject a function count if there's none. It's reasonable for a pass to
+ // want to clear the MD_prof of a function with zero entry count. If the
+ // original profile (iFDO or AFDO) is empty for a function, it's simpler to
+ // require assigning it the 0-entry count explicitly than to mark every branch
+ // as cold (we do want some explicit information in the spirit of what this
+ // verifier wants to achieve - make dropping / corrupting MD_prof
+ // unit-testable)
+ if (!F.getEntryCount(/*AllowSynthetic=*/true))
+ F.setEntryCount(DefaultFunctionEntryCount);
+ // If there is an entry count that's 0, then don't bother injecting. We won't
+ // verify these either.
+ if (F.getEntryCount(/*AllowSynthetic=*/true)->getCount() == 0)
+ return false;
bool Changed = false;
for (auto &BB : F) {
auto *Term = getTerminatorBenefitingFromMDProf(BB);
@@ -119,11 +136,20 @@ PreservedAnalyses ProfileInjectorPass::run(Function &F,
PreservedAnalyses ProfileVerifierPass::run(Function &F,
FunctionAnalysisManager &FAM) {
+ const auto EntryCount = F.getEntryCount(/*AllowSynthetic=*/true);
+ if (!EntryCount) {
+ F.getContext().emitError("Profile verification failed: function entry "
+ "count missing (set to 0 if cold)");
+ return PreservedAnalyses::all();
+ }
+ if (EntryCount->getCount() == 0)
+ return PreservedAnalyses::all();
for (const auto &BB : F)
if (const auto *Term =
ProfileInjector::getTerminatorBenefitingFromMDProf(BB))
if (!Term->getMetadata(LLVMContext::MD_prof))
- F.getContext().emitError("Profile verification failed");
+ F.getContext().emitError(
+ "Profile verification failed: branch annotation missing");
- return PreservedAnalyses::none();
+ return PreservedAnalyses::all();
}
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 674de57..deabacc 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -6641,16 +6641,20 @@ Value *SwitchLookupTable::buildLookup(Value *Index, IRBuilder<> &Builder,
}
case ArrayKind: {
Type *IndexTy = DL.getIndexType(Array->getType());
+ auto *ArrayTy = cast<ArrayType>(Array->getValueType());
- if (Index->getType() != IndexTy)
+ if (Index->getType() != IndexTy) {
+ unsigned OldBitWidth = Index->getType()->getIntegerBitWidth();
Index = Builder.CreateZExtOrTrunc(Index, IndexTy);
+ if (auto *Zext = dyn_cast<ZExtInst>(Index))
+ Zext->setNonNeg(
+ isUIntN(OldBitWidth - 1, ArrayTy->getNumElements() - 1));
+ }
Value *GEPIndices[] = {ConstantInt::get(IndexTy, 0), Index};
- Value *GEP = Builder.CreateInBoundsGEP(Array->getValueType(), Array,
- GEPIndices, "switch.gep");
- return Builder.CreateLoad(
- cast<ArrayType>(Array->getValueType())->getElementType(), GEP,
- "switch.load");
+ Value *GEP =
+ Builder.CreateInBoundsGEP(ArrayTy, Array, GEPIndices, "switch.gep");
+ return Builder.CreateLoad(ArrayTy->getElementType(), GEP, "switch.load");
}
}
llvm_unreachable("Unknown lookup table kind!");
diff --git a/llvm/lib/Transforms/Utils/SplitModuleByCategory.cpp b/llvm/lib/Transforms/Utils/SplitModuleByCategory.cpp
new file mode 100644
index 0000000..6b18ece
--- /dev/null
+++ b/llvm/lib/Transforms/Utils/SplitModuleByCategory.cpp
@@ -0,0 +1,323 @@
+//===-------- SplitModuleByCategory.cpp - split a module by categories ----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// See comments in the header.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/SplitModuleByCategory.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+
+#include <map>
+#include <string>
+#include <utility>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "split-module-by-category"
+
+namespace {
+
+// A vector that contains a group of function with the same category.
+using EntryPointSet = SetVector<const Function *>;
+
+/// Represents a group of functions with one category.
+struct EntryPointGroup {
+ int ID;
+ EntryPointSet Functions;
+
+ EntryPointGroup() = default;
+
+ EntryPointGroup(int ID, EntryPointSet &&Functions = EntryPointSet())
+ : ID(ID), Functions(std::move(Functions)) {}
+
+ void clear() { Functions.clear(); }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ LLVM_DUMP_METHOD void dump() const {
+ constexpr size_t INDENT = 4;
+ dbgs().indent(INDENT) << "ENTRY POINTS"
+ << " " << ID << " {\n";
+ for (const Function *F : Functions)
+ dbgs().indent(INDENT) << " " << F->getName() << "\n";
+
+ dbgs().indent(INDENT) << "}\n";
+ }
+#endif
+};
+
+/// Annotates an llvm::Module with information necessary to perform and track
+/// the result of code (llvm::Module instances) splitting:
+/// - entry points group from the module.
+class ModuleDesc {
+ std::unique_ptr<Module> M;
+ EntryPointGroup EntryPoints;
+
+public:
+ ModuleDesc(std::unique_ptr<Module> M,
+ EntryPointGroup &&EntryPoints = EntryPointGroup())
+ : M(std::move(M)), EntryPoints(std::move(EntryPoints)) {
+ assert(this->M && "Module should be non-null");
+ }
+
+ Module &getModule() { return *M; }
+ const Module &getModule() const { return *M; }
+
+ std::unique_ptr<Module> releaseModule() {
+ EntryPoints.clear();
+ return std::move(M);
+ }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ LLVM_DUMP_METHOD void dump() const {
+ dbgs() << "ModuleDesc[" << M->getName() << "] {\n";
+ EntryPoints.dump();
+ dbgs() << "}\n";
+ }
+#endif
+};
+
+bool isKernel(const Function &F) {
+ return F.getCallingConv() == CallingConv::SPIR_KERNEL ||
+ F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
+ F.getCallingConv() == CallingConv::PTX_Kernel;
+}
+
+// Represents "dependency" or "use" graph of global objects (functions and
+// global variables) in a module. It is used during code split to
+// understand which global variables and functions (other than entry points)
+// should be included into a split module.
+//
+// Nodes of the graph represent LLVM's GlobalObjects, edges "A" -> "B" represent
+// the fact that if "A" is included into a module, then "B" should be included
+// as well.
+//
+// Examples of dependencies which are represented in this graph:
+// - Function FA calls function FB
+// - Function FA uses global variable GA
+// - Global variable GA references (initialized with) function FB
+// - Function FA stores address of a function FB somewhere
+//
+// The following cases are treated as dependencies between global objects:
+// 1. Global object A is used by a global object B in any way (store,
+// bitcast, phi node, call, etc.): "A" -> "B" edge will be added to the
+// graph;
+// 2. function A performs an indirect call of a function with signature S and
+// there is a function B with signature S. "A" -> "B" edge will be added to
+// the graph;
+class DependencyGraph {
+public:
+ using GlobalSet = SmallPtrSet<const GlobalValue *, 16>;
+
+ DependencyGraph(const Module &M) {
+ // Group functions by their signature to handle case (2) described above
+ DenseMap<const FunctionType *, DependencyGraph::GlobalSet>
+ FuncTypeToFuncsMap;
+ for (const Function &F : M.functions()) {
+ // Kernels can't be called (either directly or indirectly).
+ if (isKernel(F))
+ continue;
+
+ FuncTypeToFuncsMap[F.getFunctionType()].insert(&F);
+ }
+
+ for (const Function &F : M.functions()) {
+ // case (1), see comment above the class definition
+ for (const Value *U : F.users())
+ addUserToGraphRecursively(cast<const User>(U), &F);
+
+ // case (2), see comment above the class definition
+ for (const Instruction &I : instructions(F)) {
+ const CallBase *CB = dyn_cast<CallBase>(&I);
+ if (!CB || !CB->isIndirectCall()) // Direct calls were handled above
+ continue;
+
+ const FunctionType *Signature = CB->getFunctionType();
+ GlobalSet &PotentialCallees = FuncTypeToFuncsMap[Signature];
+ Graph[&F].insert(PotentialCallees.begin(), PotentialCallees.end());
+ }
+ }
+
+ // And every global variable (but their handling is a bit simpler)
+ for (const GlobalVariable &GV : M.globals())
+ for (const Value *U : GV.users())
+ addUserToGraphRecursively(cast<const User>(U), &GV);
+ }
+
+ iterator_range<GlobalSet::const_iterator>
+ dependencies(const GlobalValue *Val) const {
+ auto It = Graph.find(Val);
+ return (It == Graph.end())
+ ? make_range(EmptySet.begin(), EmptySet.end())
+ : make_range(It->second.begin(), It->second.end());
+ }
+
+private:
+ void addUserToGraphRecursively(const User *Root, const GlobalValue *V) {
+ SmallVector<const User *, 8> WorkList;
+ WorkList.push_back(Root);
+
+ while (!WorkList.empty()) {
+ const User *U = WorkList.pop_back_val();
+ if (const auto *I = dyn_cast<const Instruction>(U)) {
+ const Function *UFunc = I->getFunction();
+ Graph[UFunc].insert(V);
+ } else if (isa<const Constant>(U)) {
+ if (const auto *GV = dyn_cast<const GlobalVariable>(U))
+ Graph[GV].insert(V);
+ // This could be a global variable or some constant expression (like
+ // bitcast or gep). We trace users of this constant further to reach
+ // global objects they are used by and add them to the graph.
+ for (const User *UU : U->users())
+ WorkList.push_back(UU);
+ } else {
+ llvm_unreachable("Unhandled type of function user");
+ }
+ }
+ }
+
+ DenseMap<const GlobalValue *, GlobalSet> Graph;
+ SmallPtrSet<const GlobalValue *, 1> EmptySet;
+};
+
+void collectFunctionsAndGlobalVariablesToExtract(
+ SetVector<const GlobalValue *> &GVs, const Module &M,
+ const EntryPointGroup &ModuleEntryPoints, const DependencyGraph &DG) {
+ // We start with module entry points
+ for (const Function *F : ModuleEntryPoints.Functions)
+ GVs.insert(F);
+
+ // Non-discardable global variables are also include into the initial set
+ for (const GlobalVariable &GV : M.globals())
+ if (!GV.isDiscardableIfUnused())
+ GVs.insert(&GV);
+
+ // GVs has SetVector type. This type inserts a value only if it is not yet
+ // present there. So, recursion is not expected here.
+ size_t Idx = 0;
+ while (Idx < GVs.size()) {
+ const GlobalValue *Obj = GVs[Idx++];
+
+ for (const GlobalValue *Dep : DG.dependencies(Obj)) {
+ if (const auto *Func = dyn_cast<const Function>(Dep)) {
+ if (!Func->isDeclaration())
+ GVs.insert(Func);
+ } else {
+ GVs.insert(Dep); // Global variables are added unconditionally
+ }
+ }
+ }
+}
+
+ModuleDesc extractSubModule(const Module &M,
+ const SetVector<const GlobalValue *> &GVs,
+ EntryPointGroup &&ModuleEntryPoints) {
+ ValueToValueMapTy VMap;
+ // Clone definitions only for needed globals. Others will be added as
+ // declarations and removed later.
+ std::unique_ptr<Module> SubM = CloneModule(
+ M, VMap, [&](const GlobalValue *GV) { return GVs.contains(GV); });
+ // Replace entry points with cloned ones.
+ EntryPointSet NewEPs;
+ const EntryPointSet &EPs = ModuleEntryPoints.Functions;
+ llvm::for_each(
+ EPs, [&](const Function *F) { NewEPs.insert(cast<Function>(VMap[F])); });
+ ModuleEntryPoints.Functions = std::move(NewEPs);
+ return ModuleDesc{std::move(SubM), std::move(ModuleEntryPoints)};
+}
+
+// The function produces a copy of input LLVM IR module M with only those
+// functions and globals that can be called from entry points that are specified
+// in ModuleEntryPoints vector, in addition to the entry point functions.
+ModuleDesc extractCallGraph(const Module &M,
+ EntryPointGroup &&ModuleEntryPoints,
+ const DependencyGraph &DG) {
+ SetVector<const GlobalValue *> GVs;
+ collectFunctionsAndGlobalVariablesToExtract(GVs, M, ModuleEntryPoints, DG);
+
+ ModuleDesc SplitM = extractSubModule(M, GVs, std::move(ModuleEntryPoints));
+ LLVM_DEBUG(SplitM.dump());
+ return SplitM;
+}
+
+using EntryPointGroupVec = SmallVector<EntryPointGroup>;
+
+/// Module Splitter.
+/// It gets a module and a collection of entry points groups.
+/// Each group specifies subset entry points from input module that should be
+/// included in a split module.
+class ModuleSplitter {
+private:
+ std::unique_ptr<Module> M;
+ EntryPointGroupVec Groups;
+ DependencyGraph DG;
+
+private:
+ EntryPointGroup drawEntryPointGroup() {
+ assert(Groups.size() > 0 && "Reached end of entry point groups list.");
+ EntryPointGroup Group = std::move(Groups.back());
+ Groups.pop_back();
+ return Group;
+ }
+
+public:
+ ModuleSplitter(std::unique_ptr<Module> Module, EntryPointGroupVec &&GroupVec)
+ : M(std::move(Module)), Groups(std::move(GroupVec)), DG(*M) {
+ assert(!Groups.empty() && "Entry points groups collection is empty!");
+ }
+
+ /// Gets next subsequence of entry points in an input module and provides
+ /// split submodule containing these entry points and their dependencies.
+ ModuleDesc getNextSplit() {
+ return extractCallGraph(*M, drawEntryPointGroup(), DG);
+ }
+
+ /// Check that there are still submodules to split.
+ bool hasMoreSplits() const { return Groups.size() > 0; }
+};
+
+EntryPointGroupVec selectEntryPointGroups(
+ const Module &M, function_ref<std::optional<int>(const Function &F)> EPC) {
+ // std::map is used here to ensure stable ordering of entry point groups,
+ // which is based on their contents, this greatly helps LIT tests
+ // Note: EPC is allowed to return big identifiers. Therefore, we use
+ // std::map + SmallVector approach here.
+ std::map<int, EntryPointSet> EntryPointsMap;
+
+ for (const auto &F : M.functions())
+ if (std::optional<int> Category = EPC(F); Category)
+ EntryPointsMap[*Category].insert(&F);
+
+ EntryPointGroupVec Groups;
+ Groups.reserve(EntryPointsMap.size());
+ for (auto &[Key, EntryPoints] : EntryPointsMap)
+ Groups.emplace_back(Key, std::move(EntryPoints));
+
+ return Groups;
+}
+
+} // namespace
+
+void llvm::splitModuleTransitiveFromEntryPoints(
+ std::unique_ptr<Module> M,
+ function_ref<std::optional<int>(const Function &F)> EntryPointCategorizer,
+ function_ref<void(std::unique_ptr<Module> Part)> Callback) {
+ EntryPointGroupVec Groups = selectEntryPointGroups(*M, EntryPointCategorizer);
+ ModuleSplitter Splitter(std::move(M), std::move(Groups));
+ while (Splitter.hasMoreSplits()) {
+ ModuleDesc MD = Splitter.getNextSplit();
+ Callback(MD.releaseModule());
+ }
+}
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index ea0fa06..912c893 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -263,6 +263,13 @@ public:
new VPInstruction(VPInstruction::PtrAdd, {Ptr, Offset},
GEPNoWrapFlags::inBounds(), DL, Name));
}
+ VPInstruction *createWidePtrAdd(VPValue *Ptr, VPValue *Offset,
+ DebugLoc DL = DebugLoc::getUnknown(),
+ const Twine &Name = "") {
+ return tryInsertInstruction(
+ new VPInstruction(VPInstruction::WidePtrAdd, {Ptr, Offset},
+ GEPNoWrapFlags::none(), DL, Name));
+ }
VPPhi *createScalarPhi(ArrayRef<VPValue *> IncomingValues, DebugLoc DL,
const Twine &Name = "") {
@@ -486,6 +493,13 @@ public:
/// all profitable VFs in ProfitableVFs.
VectorizationFactor computeBestVF();
+ /// \return The desired interleave count.
+ /// If interleave count has been specified by metadata it will be returned.
+ /// Otherwise, the interleave count is computed and returned. VF and LoopCost
+ /// are the selected vectorization factor and the cost of the selected VF.
+ unsigned selectInterleaveCount(VPlan &Plan, ElementCount VF,
+ InstructionCost LoopCost);
+
/// Generate the IR code for the vectorized loop captured in VPlan \p BestPlan
/// according to the best selected \p VF and \p UF.
///
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index b4ea70e..eb0e0fd 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -974,13 +974,6 @@ public:
/// 64 bit loop indices.
std::pair<unsigned, unsigned> getSmallestAndWidestTypes();
- /// \return The desired interleave count.
- /// If interleave count has been specified by metadata it will be returned.
- /// Otherwise, the interleave count is computed and returned. VF and LoopCost
- /// are the selected vectorization factor and the cost of the selected VF.
- unsigned selectInterleaveCount(VPlan &Plan, ElementCount VF,
- InstructionCost LoopCost);
-
/// Memory access instruction may be vectorized in more than one way.
/// Form of instruction after vectorization depends on cost.
/// This function takes cost-based decisions for Load/Store instructions
@@ -1590,7 +1583,7 @@ private:
/// A type representing the costs for instructions if they were to be
/// scalarized rather than vectorized. The entries are Instruction-Cost
/// pairs.
- using ScalarCostsTy = DenseMap<Instruction *, InstructionCost>;
+ using ScalarCostsTy = MapVector<Instruction *, InstructionCost>;
/// A set containing all BasicBlocks that are known to present after
/// vectorization as a predicated block.
@@ -4653,8 +4646,8 @@ void LoopVectorizationCostModel::collectElementTypesForWidening() {
}
unsigned
-LoopVectorizationCostModel::selectInterleaveCount(VPlan &Plan, ElementCount VF,
- InstructionCost LoopCost) {
+LoopVectorizationPlanner::selectInterleaveCount(VPlan &Plan, ElementCount VF,
+ InstructionCost LoopCost) {
// -- The interleave heuristics --
// We interleave the loop in order to expose ILP and reduce the loop overhead.
// There are many micro-architectural considerations that we can't predict
@@ -4669,11 +4662,11 @@ LoopVectorizationCostModel::selectInterleaveCount(VPlan &Plan, ElementCount VF,
// 3. We don't interleave if we think that we will spill registers to memory
// due to the increased register pressure.
- if (!isScalarEpilogueAllowed())
+ if (!CM.isScalarEpilogueAllowed())
return 1;
- // Do not interleave if EVL is preferred and no User IC is specified.
- if (foldTailWithEVL()) {
+ if (any_of(Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis(),
+ IsaPred<VPEVLBasedIVPHIRecipe>)) {
LLVM_DEBUG(dbgs() << "LV: Preference for VP intrinsics indicated. "
"Unroll factor forced to be 1.\n");
return 1;
@@ -4686,15 +4679,20 @@ LoopVectorizationCostModel::selectInterleaveCount(VPlan &Plan, ElementCount VF,
// We don't attempt to perform interleaving for loops with uncountable early
// exits because the VPInstruction::AnyOf code cannot currently handle
// multiple parts.
- if (Legal->hasUncountableEarlyExit())
+ if (Plan.hasEarlyExit())
return 1;
- const bool HasReductions = !Legal->getReductionVars().empty();
+ const bool HasReductions =
+ any_of(Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis(),
+ IsaPred<VPReductionPHIRecipe>);
// If we did not calculate the cost for VF (because the user selected the VF)
// then we calculate the cost of VF here.
if (LoopCost == 0) {
- LoopCost = expectedCost(VF);
+ if (VF.isScalar())
+ LoopCost = CM.expectedCost(VF);
+ else
+ LoopCost = cost(Plan, VF);
assert(LoopCost.isValid() && "Expected to have chosen a VF with valid cost");
// Loop body is free and there is no need for interleaving.
@@ -4703,7 +4701,7 @@ LoopVectorizationCostModel::selectInterleaveCount(VPlan &Plan, ElementCount VF,
}
VPRegisterUsage R =
- calculateRegisterUsageForPlan(Plan, {VF}, TTI, ValuesToIgnore)[0];
+ calculateRegisterUsageForPlan(Plan, {VF}, TTI, CM.ValuesToIgnore)[0];
// We divide by these constants so assume that we have at least one
// instruction that uses at least one register.
for (auto &Pair : R.MaxLocalUsers) {
@@ -4766,23 +4764,24 @@ LoopVectorizationCostModel::selectInterleaveCount(VPlan &Plan, ElementCount VF,
// Try to get the exact trip count, or an estimate based on profiling data or
// ConstantMax from PSE, failing that.
- auto BestKnownTC = getSmallBestKnownTC(PSE, TheLoop);
+ auto BestKnownTC = getSmallBestKnownTC(PSE, OrigLoop);
// For fixed length VFs treat a scalable trip count as unknown.
if (BestKnownTC && (BestKnownTC->isFixed() || VF.isScalable())) {
// Re-evaluate trip counts and VFs to be in the same numerical space.
- unsigned AvailableTC = estimateElementCount(*BestKnownTC, VScaleForTuning);
- unsigned EstimatedVF = estimateElementCount(VF, VScaleForTuning);
+ unsigned AvailableTC =
+ estimateElementCount(*BestKnownTC, CM.getVScaleForTuning());
+ unsigned EstimatedVF = estimateElementCount(VF, CM.getVScaleForTuning());
// At least one iteration must be scalar when this constraint holds. So the
// maximum available iterations for interleaving is one less.
- if (requiresScalarEpilogue(VF.isVector()))
+ if (CM.requiresScalarEpilogue(VF.isVector()))
--AvailableTC;
unsigned InterleaveCountLB = bit_floor(std::max(
1u, std::min(AvailableTC / (EstimatedVF * 2), MaxInterleaveCount)));
- if (getSmallConstantTripCount(PSE.getSE(), TheLoop).isNonZero()) {
+ if (getSmallConstantTripCount(PSE.getSE(), OrigLoop).isNonZero()) {
// If the best known trip count is exact, we select between two
// prospective ICs, where
//
@@ -4843,7 +4842,7 @@ LoopVectorizationCostModel::selectInterleaveCount(VPlan &Plan, ElementCount VF,
// vectorized the loop we will have done the runtime check and so interleaving
// won't require further checks.
bool ScalarInterleavingRequiresPredication =
- (VF.isScalar() && any_of(TheLoop->blocks(), [this](BasicBlock *BB) {
+ (VF.isScalar() && any_of(OrigLoop->blocks(), [this](BasicBlock *BB) {
return Legal->blockNeedsPredication(BB);
}));
bool ScalarInterleavingRequiresRuntimePointerCheck =
@@ -4866,8 +4865,39 @@ LoopVectorizationCostModel::selectInterleaveCount(VPlan &Plan, ElementCount VF,
// Interleave until store/load ports (estimated by max interleave count) are
// saturated.
- unsigned NumStores = Legal->getNumStores();
- unsigned NumLoads = Legal->getNumLoads();
+ unsigned NumStores = 0;
+ unsigned NumLoads = 0;
+ for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
+ vp_depth_first_deep(Plan.getVectorLoopRegion()->getEntry()))) {
+ for (VPRecipeBase &R : *VPBB) {
+ if (isa<VPWidenLoadRecipe, VPWidenLoadEVLRecipe>(&R)) {
+ NumLoads++;
+ continue;
+ }
+ if (isa<VPWidenStoreRecipe, VPWidenStoreEVLRecipe>(&R)) {
+ NumStores++;
+ continue;
+ }
+
+ if (auto *InterleaveR = dyn_cast<VPInterleaveRecipe>(&R)) {
+ if (unsigned StoreOps = InterleaveR->getNumStoreOperands())
+ NumStores += StoreOps;
+ else
+ NumLoads += InterleaveR->getNumDefinedValues();
+ continue;
+ }
+ if (auto *RepR = dyn_cast<VPReplicateRecipe>(&R)) {
+ NumLoads += isa<LoadInst>(RepR->getUnderlyingInstr());
+ NumStores += isa<StoreInst>(RepR->getUnderlyingInstr());
+ continue;
+ }
+ if (isa<VPHistogramRecipe>(&R)) {
+ NumLoads++;
+ NumStores++;
+ continue;
+ }
+ }
+ }
unsigned StoresIC = IC / (NumStores ? NumStores : 1);
unsigned LoadsIC = IC / (NumLoads ? NumLoads : 1);
@@ -4877,12 +4907,14 @@ LoopVectorizationCostModel::selectInterleaveCount(VPlan &Plan, ElementCount VF,
// do the final reduction after the loop.
bool HasSelectCmpReductions =
HasReductions &&
- any_of(Legal->getReductionVars(), [&](auto &Reduction) -> bool {
- const RecurrenceDescriptor &RdxDesc = Reduction.second;
- RecurKind RK = RdxDesc.getRecurrenceKind();
- return RecurrenceDescriptor::isAnyOfRecurrenceKind(RK) ||
- RecurrenceDescriptor::isFindIVRecurrenceKind(RK);
- });
+ any_of(Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis(),
+ [](VPRecipeBase &R) {
+ auto *RedR = dyn_cast<VPReductionPHIRecipe>(&R);
+ return RedR && (RecurrenceDescriptor::isAnyOfRecurrenceKind(
+ RedR->getRecurrenceKind()) ||
+ RecurrenceDescriptor::isFindIVRecurrenceKind(
+ RedR->getRecurrenceKind()));
+ });
if (HasSelectCmpReductions) {
LLVM_DEBUG(dbgs() << "LV: Not interleaving select-cmp reductions.\n");
return 1;
@@ -4893,12 +4925,14 @@ LoopVectorizationCostModel::selectInterleaveCount(VPlan &Plan, ElementCount VF,
// we're interleaving is inside another loop. For tree-wise reductions
// set the limit to 2, and for ordered reductions it's best to disable
// interleaving entirely.
- if (HasReductions && TheLoop->getLoopDepth() > 1) {
+ if (HasReductions && OrigLoop->getLoopDepth() > 1) {
bool HasOrderedReductions =
- any_of(Legal->getReductionVars(), [&](auto &Reduction) -> bool {
- const RecurrenceDescriptor &RdxDesc = Reduction.second;
- return RdxDesc.isOrdered();
- });
+ any_of(Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis(),
+ [](VPRecipeBase &R) {
+ auto *RedR = dyn_cast<VPReductionPHIRecipe>(&R);
+
+ return RedR && RedR->isOrdered();
+ });
if (HasOrderedReductions) {
LLVM_DEBUG(
dbgs() << "LV: Not interleaving scalar ordered reductions.\n");
@@ -4992,7 +5026,8 @@ void LoopVectorizationCostModel::collectInstsToScalarize(ElementCount VF) {
if (!isScalarAfterVectorization(&I, VF) && !VF.isScalable() &&
!useEmulatedMaskMemRefHack(&I, VF) &&
computePredInstDiscount(&I, ScalarCosts, VF) >= 0) {
- ScalarCostsVF.insert_range(ScalarCosts);
+ for (const auto &[I, IC] : ScalarCosts)
+ ScalarCostsVF.insert({I, IC});
// Check if we decided to scalarize a call. If so, update the widening
// decision of the call to CM_Scalarize with the computed scalar cost.
for (const auto &[I, Cost] : ScalarCosts) {
@@ -7302,6 +7337,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
// Retrieving VectorPH now when it's easier while VPlan still has Regions.
VPBasicBlock *VectorPH = cast<VPBasicBlock>(BestVPlan.getVectorPreheader());
+
VPlanTransforms::optimizeForVFAndUF(BestVPlan, BestVF, BestUF, PSE);
VPlanTransforms::simplifyRecipes(BestVPlan, *Legal->getWidestInductionType());
VPlanTransforms::removeBranchOnConst(BestVPlan);
@@ -7317,6 +7353,8 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
VPlanTransforms::dissolveLoopRegions(BestVPlan);
// Canonicalize EVL loops after regions are dissolved.
VPlanTransforms::canonicalizeEVLLoops(BestVPlan);
+ VPlanTransforms::materializeBackedgeTakenCount(BestVPlan, VectorPH);
+
// Perform the actual loop transformation.
VPTransformState State(&TTI, BestVF, LI, DT, ILV.AC, ILV.Builder, &BestVPlan,
OrigLoop->getParentLoop(),
@@ -7373,7 +7411,6 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
// 2. Copy and widen instructions from the old loop into the new loop.
BestVPlan.prepareToExecute(
- ILV.getTripCount(),
ILV.getOrCreateVectorTripCount(ILV.LoopVectorPreHeader), State);
replaceVPBBWithIRVPBB(VectorPH, State.CFG.PrevBB);
@@ -10119,7 +10156,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
GeneratedRTChecks Checks(PSE, DT, LI, TTI, F->getDataLayout(), CM.CostKind);
if (LVP.hasPlanWithVF(VF.Width)) {
// Select the interleave count.
- IC = CM.selectInterleaveCount(LVP.getPlanFor(VF.Width), VF.Width, VF.Cost);
+ IC = LVP.selectInterleaveCount(LVP.getPlanFor(VF.Width), VF.Width, VF.Cost);
unsigned SelectedIC = std::max(IC, UserIC);
// Optimistically generate runtime checks if they are needed. Drop them if
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 593868f..62ab3f52 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -991,6 +991,17 @@ class BinOpSameOpcodeHelper {
return Candidate & OrBIT;
case Instruction::Xor:
return Candidate & XorBIT;
+ case Instruction::LShr:
+ case Instruction::FAdd:
+ case Instruction::FSub:
+ case Instruction::FMul:
+ case Instruction::SDiv:
+ case Instruction::UDiv:
+ case Instruction::FDiv:
+ case Instruction::SRem:
+ case Instruction::URem:
+ case Instruction::FRem:
+ return false;
default:
break;
}
@@ -1238,6 +1249,12 @@ public:
BinOpSameOpcodeHelper Converter(MainOp);
if (!Converter.add(I) || !Converter.add(MainOp))
return nullptr;
+ if (isAltShuffle() && !Converter.hasCandidateOpcode(MainOp->getOpcode())) {
+ BinOpSameOpcodeHelper AltConverter(AltOp);
+ if (AltConverter.add(I) && AltConverter.add(AltOp) &&
+ AltConverter.hasCandidateOpcode(AltOp->getOpcode()))
+ return AltOp;
+ }
if (Converter.hasAltOp() && !isAltShuffle())
return nullptr;
return Converter.hasAltOp() ? AltOp : MainOp;
@@ -1329,7 +1346,7 @@ public:
// If the copyable instructions comes after MainOp
// (non-schedulable, but used in the block) - cannot vectorize
// it, will possibly generate use before def.
- (isVectorLikeInstWithConstOps(I) || !MainOp->comesBefore(I)));
+ !MainOp->comesBefore(I));
};
return IsNonSchedulableCopyableElement(V);
@@ -18887,8 +18904,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
if (!UseIntrinsic) {
VFShape Shape =
VFShape::get(CI->getFunctionType(),
- ElementCount::getFixed(
- static_cast<unsigned>(VecTy->getNumElements())),
+ ElementCount::getFixed(VecTy->getNumElements()),
false /*HasGlobalPred*/);
CF = VFDatabase(*CI).getVectorizedFunction(Shape);
} else {
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 25b9616..8052e31 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -951,17 +951,7 @@ VPlan::~VPlan() {
delete BackedgeTakenCount;
}
-void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
- VPTransformState &State) {
- Type *TCTy = TripCountV->getType();
- // Check if the backedge taken count is needed, and if so build it.
- if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) {
- IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
- auto *TCMO = Builder.CreateSub(TripCountV, ConstantInt::get(TCTy, 1),
- "trip.count.minus.1");
- BackedgeTakenCount->setUnderlyingValue(TCMO);
- }
-
+void VPlan::prepareToExecute(Value *VectorTripCountV, VPTransformState &State) {
if (!VectorTripCount.getUnderlyingValue())
VectorTripCount.setUnderlyingValue(VectorTripCountV);
else
@@ -969,6 +959,7 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
"VectorTripCount set earlier must much VectorTripCountV");
IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
+ Type *TCTy = VectorTripCountV->getType();
// FIXME: Model VF * UF computation completely in VPlan.
unsigned UF = getUF();
if (VF.getNumUsers()) {
@@ -1047,21 +1038,6 @@ void VPlan::execute(VPTransformState *State) {
if (isa<VPWidenPHIRecipe>(&R))
continue;
- if (auto *WidenPhi = dyn_cast<VPWidenPointerInductionRecipe>(&R)) {
- assert(!WidenPhi->onlyScalarsGenerated(State->VF.isScalable()) &&
- "recipe generating only scalars should have been replaced");
- auto *GEP = cast<GetElementPtrInst>(State->get(WidenPhi));
- PHINode *Phi = cast<PHINode>(GEP->getPointerOperand());
-
- Phi->setIncomingBlock(1, VectorLatchBB);
-
- // Move the last step to the end of the latch block. This ensures
- // consistent placement of all induction updates.
- Instruction *Inc = cast<Instruction>(Phi->getIncomingValue(1));
- Inc->moveBefore(std::prev(VectorLatchBB->getTerminator()->getIterator()));
- continue;
- }
-
auto *PhiR = cast<VPSingleDefRecipe>(&R);
// VPInstructions currently model scalar Phis only.
bool NeedsScalar = isa<VPInstruction>(PhiR) ||
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index a5de593..8dfb982 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -991,6 +991,9 @@ public:
// operand). Only generates scalar values (either for the first lane only or
// for all lanes, depending on its uses).
PtrAdd,
+ // Add a vector offset in bytes (second operand) to a scalar base pointer
+ // (first operand).
+ WidePtrAdd,
// Returns a scalar boolean value, which is true if any lane of its
// (boolean) vector operands is true. It produces the reduced value across
// all unrolled iterations. Unrolling will add all copies of its original
@@ -1979,6 +1982,9 @@ public:
/// Update the step value of the recipe.
void setStepValue(VPValue *V) { setOperand(1, V); }
+ VPValue *getVFValue() { return getOperand(2); }
+ const VPValue *getVFValue() const { return getOperand(2); }
+
/// Returns the number of incoming values, also number of incoming blocks.
/// Note that at the moment, VPWidenPointerInductionRecipe only has a single
/// incoming value, its start value.
@@ -2068,9 +2074,6 @@ public:
VPSlotTracker &SlotTracker) const override;
#endif
- VPValue *getVFValue() { return getOperand(2); }
- const VPValue *getVFValue() const { return getOperand(2); }
-
VPValue *getSplatVFValue() {
// If the recipe has been unrolled return the VPValue for the induction
// increment.
@@ -2106,8 +2109,7 @@ public:
}
};
-class VPWidenPointerInductionRecipe : public VPWidenInductionRecipe,
- public VPUnrollPartAccessor<4> {
+class VPWidenPointerInductionRecipe : public VPWidenInductionRecipe {
bool IsScalarAfterVectorization;
public:
@@ -2136,18 +2138,14 @@ public:
VP_CLASSOF_IMPL(VPDef::VPWidenPointerInductionSC)
/// Generate vector values for the pointer induction.
- void execute(VPTransformState &State) override;
+ void execute(VPTransformState &State) override {
+ llvm_unreachable("cannot execute this recipe, should be expanded via "
+ "expandVPWidenPointerInduction");
+ };
/// Returns true if only scalar values will be generated.
bool onlyScalarsGenerated(bool IsScalable);
- /// Returns the VPValue representing the value of this induction at
- /// the first unrolled part, if it exists. Returns itself if unrolling did not
- /// take place.
- VPValue *getFirstUnrolledPartOperand() {
- return getUnrollPart(*this) == 0 ? this : getOperand(3);
- }
-
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent,
@@ -3958,8 +3956,7 @@ public:
}
/// Prepare the plan for execution, setting up the required live-in values.
- void prepareToExecute(Value *TripCount, Value *VectorTripCount,
- VPTransformState &State);
+ void prepareToExecute(Value *VectorTripCount, VPTransformState &State);
/// Generate the IR code for this VPlan.
void execute(VPTransformState *State);
@@ -4133,6 +4130,18 @@ public:
return It->second;
}
+ /// Return a VPValue wrapping i1 true.
+ VPValue *getTrue() {
+ LLVMContext &Ctx = getContext();
+ return getOrAddLiveIn(ConstantInt::getTrue(Ctx));
+ }
+
+ /// Return a VPValue wrapping i1 false.
+ VPValue *getFalse() {
+ LLVMContext &Ctx = getContext();
+ return getOrAddLiveIn(ConstantInt::getFalse(Ctx));
+ }
+
/// Return the live-in VPValue for \p V, if there is one or nullptr otherwise.
VPValue *getLiveIn(Value *V) const { return Value2VPValue.lookup(V); }
@@ -4229,7 +4238,10 @@ public:
/// block with multiple predecessors (one for the exit via the latch and one
/// via the other early exit).
bool hasEarlyExit() const {
- return ExitBlocks.size() > 1 ||
+ return count_if(ExitBlocks,
+ [](VPIRBasicBlock *EB) {
+ return EB->getNumPredecessors() != 0;
+ }) > 1 ||
(ExitBlocks.size() == 1 && ExitBlocks[0]->getNumPredecessors() > 1);
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
index 16072f2..4c3cdda 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
@@ -128,6 +128,7 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
return IntegerType::get(Ctx, 1);
case VPInstruction::Broadcast:
case VPInstruction::PtrAdd:
+ case VPInstruction::WidePtrAdd:
// Return the type based on first operand.
return inferScalarType(R->getOperand(0));
case VPInstruction::BranchOnCond:
diff --git a/llvm/lib/Transforms/Vectorize/VPlanCFG.h b/llvm/lib/Transforms/Vectorize/VPlanCFG.h
index b77aa9d..c79485c 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanCFG.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanCFG.h
@@ -231,6 +231,13 @@ vp_post_order_shallow(VPBlockBase *G) {
}
/// Returns an iterator range to traverse the graph starting at \p G in
+/// post order while traversing through region blocks.
+inline iterator_range<po_iterator<VPBlockDeepTraversalWrapper<VPBlockBase *>>>
+vp_post_order_deep(VPBlockBase *G) {
+ return post_order(VPBlockDeepTraversalWrapper<VPBlockBase *>(G));
+}
+
+/// Returns an iterator range to traverse the graph starting at \p G in
/// depth-first order while traversing through region blocks.
inline iterator_range<df_iterator<VPBlockDeepTraversalWrapper<VPBlockBase *>>>
vp_depth_first_deep(VPBlockBase *G) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index 6c1f53b..1b91901 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -570,8 +570,7 @@ void VPlanTransforms::prepareForVectorization(
VPBuilder Builder(MiddleVPBB);
VPValue *Cmp;
if (!RequiresScalarEpilogueCheck)
- Cmp = Plan.getOrAddLiveIn(
- ConstantInt::getFalse(IntegerType::getInt1Ty(Plan.getContext())));
+ Cmp = Plan.getFalse();
else if (TailFolded)
Cmp = Plan.getOrAddLiveIn(
ConstantInt::getTrue(IntegerType::getInt1Ty(Plan.getContext())));
@@ -671,13 +670,12 @@ bool VPlanTransforms::handleMaxMinNumReductions(VPlan &Plan) {
Intrinsic::ID RdxIntrinsicId =
RedPhiR->getRecurrenceKind() == RecurKind::FMaxNum ? Intrinsic::maxnum
: Intrinsic::minnum;
- assert((isa<VPWidenIntrinsicRecipe>(MinMaxR) &&
- cast<VPWidenIntrinsicRecipe>(MinMaxR)->getVectorIntrinsicID() ==
- RdxIntrinsicId) ||
- (RepR &&
- cast<IntrinsicInst>(RepR->getUnderlyingInstr())->getIntrinsicID() ==
- RdxIntrinsicId) &&
- "Intrinsic did not match recurrence kind");
+ assert(((isa<VPWidenIntrinsicRecipe>(MinMaxR) &&
+ cast<VPWidenIntrinsicRecipe>(MinMaxR)->getVectorIntrinsicID() ==
+ RdxIntrinsicId) ||
+ (RepR && cast<IntrinsicInst>(RepR->getUnderlyingInstr())
+ ->getIntrinsicID() == RdxIntrinsicId)) &&
+ "Intrinsic did not match recurrence kind");
#endif
if (MinMaxR->getOperand(0) == RedPhiR)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 11b4677..47a8077 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -478,6 +478,7 @@ unsigned VPInstruction::getNumOperandsForOpcode(unsigned Opcode) {
case VPInstruction::FirstOrderRecurrenceSplice:
case VPInstruction::LogicalAnd:
case VPInstruction::PtrAdd:
+ case VPInstruction::WidePtrAdd:
case VPInstruction::WideIVStep:
return 2;
case Instruction::Select:
@@ -858,6 +859,12 @@ Value *VPInstruction::generate(VPTransformState &State) {
Value *Addend = State.get(getOperand(1), VPLane(0));
return Builder.CreatePtrAdd(Ptr, Addend, Name, getGEPNoWrapFlags());
}
+ case VPInstruction::WidePtrAdd: {
+ Value *Ptr =
+ State.get(getOperand(0), vputils::isSingleScalar(getOperand(0)));
+ Value *Addend = State.get(getOperand(1));
+ return Builder.CreatePtrAdd(Ptr, Addend, Name, getGEPNoWrapFlags());
+ }
case VPInstruction::AnyOf: {
Value *Res = State.get(getOperand(0));
for (VPValue *Op : drop_begin(operands()))
@@ -1085,6 +1092,7 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {
case VPInstruction::Not:
case VPInstruction::PtrAdd:
case VPInstruction::WideIVStep:
+ case VPInstruction::WidePtrAdd:
case VPInstruction::StepVector:
case VPInstruction::ReductionStartVector:
return false;
@@ -1123,6 +1131,8 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
return true;
case VPInstruction::PtrAdd:
return Op == getOperand(0) || vputils::onlyFirstLaneUsed(this);
+ case VPInstruction::WidePtrAdd:
+ return Op == getOperand(0);
case VPInstruction::ComputeAnyOfResult:
case VPInstruction::ComputeFindIVResult:
return Op == getOperand(1);
@@ -1231,6 +1241,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
case VPInstruction::PtrAdd:
O << "ptradd";
break;
+ case VPInstruction::WidePtrAdd:
+ O << "wide-ptradd";
+ break;
case VPInstruction::AnyOf:
O << "any-of";
break;
@@ -1817,7 +1830,8 @@ bool VPIRFlags::flagsValidForOpcode(unsigned Opcode) const {
return Opcode == Instruction::AShr;
case OperationType::GEPOp:
return Opcode == Instruction::GetElementPtr ||
- Opcode == VPInstruction::PtrAdd;
+ Opcode == VPInstruction::PtrAdd ||
+ Opcode == VPInstruction::WidePtrAdd;
case OperationType::FPMathOp:
return Opcode == Instruction::FAdd || Opcode == Instruction::FMul ||
Opcode == Instruction::FSub || Opcode == Instruction::FNeg ||
@@ -2836,12 +2850,12 @@ static void scalarizeInstruction(const Instruction *Instr,
Instruction *Cloned = Instr->clone();
if (!IsVoidRetTy) {
Cloned->setName(Instr->getName() + ".cloned");
-#if !defined(NDEBUG)
- // Verify that VPlan type inference results agree with the type of the
- // generated values.
- assert(State.TypeAnalysis.inferScalarType(RepRecipe) == Cloned->getType() &&
- "inferred type and type from generated instructions do not match");
-#endif
+ Type *ResultTy = State.TypeAnalysis.inferScalarType(RepRecipe);
+ // The operands of the replicate recipe may have been narrowed, resulting in
+ // a narrower result type. Update the type of the cloned instruction to the
+ // correct type.
+ if (ResultTy != Cloned->getType())
+ Cloned->mutateType(ResultTy);
}
RepRecipe->applyFlags(*Cloned);
@@ -3682,87 +3696,6 @@ bool VPWidenPointerInductionRecipe::onlyScalarsGenerated(bool IsScalable) {
(!IsScalable || vputils::onlyFirstLaneUsed(this));
}
-void VPWidenPointerInductionRecipe::execute(VPTransformState &State) {
- assert(getInductionDescriptor().getKind() ==
- InductionDescriptor::IK_PtrInduction &&
- "Not a pointer induction according to InductionDescriptor!");
- assert(State.TypeAnalysis.inferScalarType(this)->isPointerTy() &&
- "Unexpected type.");
- assert(!onlyScalarsGenerated(State.VF.isScalable()) &&
- "Recipe should have been replaced");
-
- unsigned CurrentPart = getUnrollPart(*this);
-
- // Build a pointer phi
- Value *ScalarStartValue = getStartValue()->getLiveInIRValue();
- Type *ScStValueType = ScalarStartValue->getType();
-
- BasicBlock *VectorPH =
- State.CFG.VPBB2IRBB.at(getParent()->getCFGPredecessor(0));
- PHINode *NewPointerPhi = nullptr;
- if (CurrentPart == 0) {
- IRBuilder<>::InsertPointGuard Guard(State.Builder);
- if (State.Builder.GetInsertPoint() !=
- State.Builder.GetInsertBlock()->getFirstNonPHIIt())
- State.Builder.SetInsertPoint(
- State.Builder.GetInsertBlock()->getFirstNonPHIIt());
- NewPointerPhi = State.Builder.CreatePHI(ScStValueType, 2, "pointer.phi");
- NewPointerPhi->addIncoming(ScalarStartValue, VectorPH);
- NewPointerPhi->setDebugLoc(getDebugLoc());
- } else {
- // The recipe has been unrolled. In that case, fetch the single pointer phi
- // shared among all unrolled parts of the recipe.
- auto *GEP =
- cast<GetElementPtrInst>(State.get(getFirstUnrolledPartOperand()));
- NewPointerPhi = cast<PHINode>(GEP->getPointerOperand());
- }
-
- // A pointer induction, performed by using a gep
- BasicBlock::iterator InductionLoc = State.Builder.GetInsertPoint();
- Value *ScalarStepValue = State.get(getStepValue(), VPLane(0));
- Type *PhiType = State.TypeAnalysis.inferScalarType(getStepValue());
- Value *RuntimeVF = getRuntimeVF(State.Builder, PhiType, State.VF);
- // Add induction update using an incorrect block temporarily. The phi node
- // will be fixed after VPlan execution. Note that at this point the latch
- // block cannot be used, as it does not exist yet.
- // TODO: Model increment value in VPlan, by turning the recipe into a
- // multi-def and a subclass of VPHeaderPHIRecipe.
- if (CurrentPart == 0) {
- // The recipe represents the first part of the pointer induction. Create the
- // GEP to increment the phi across all unrolled parts.
- Value *NumUnrolledElems = State.get(getOperand(2), true);
-
- Value *InductionGEP = GetElementPtrInst::Create(
- State.Builder.getInt8Ty(), NewPointerPhi,
- State.Builder.CreateMul(
- ScalarStepValue,
- State.Builder.CreateTrunc(NumUnrolledElems, PhiType)),
- "ptr.ind", InductionLoc);
-
- NewPointerPhi->addIncoming(InductionGEP, VectorPH);
- }
-
- // Create actual address geps that use the pointer phi as base and a
- // vectorized version of the step value (<step*0, ..., step*N>) as offset.
- Type *VecPhiType = VectorType::get(PhiType, State.VF);
- Value *StartOffsetScalar = State.Builder.CreateMul(
- RuntimeVF, ConstantInt::get(PhiType, CurrentPart));
- Value *StartOffset =
- State.Builder.CreateVectorSplat(State.VF, StartOffsetScalar);
- // Create a vector of consecutive numbers from zero to VF.
- StartOffset = State.Builder.CreateAdd(
- StartOffset, State.Builder.CreateStepVector(VecPhiType));
-
- assert(ScalarStepValue == State.get(getOperand(1), VPLane(0)) &&
- "scalar step must be the same across all parts");
- Value *GEP = State.Builder.CreateGEP(
- State.Builder.getInt8Ty(), NewPointerPhi,
- State.Builder.CreateMul(StartOffset, State.Builder.CreateVectorSplat(
- State.VF, ScalarStepValue)),
- "vector.gep");
- State.set(this, GEP);
-}
-
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPWidenPointerInductionRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
@@ -3921,11 +3854,6 @@ void VPWidenPHIRecipe::execute(VPTransformState &State) {
Value *Op0 = State.get(getOperand(0));
Type *VecTy = Op0->getType();
Instruction *VecPhi = State.Builder.CreatePHI(VecTy, 2, Name);
- // Manually move it with the other PHIs in case PHI recipes above this one
- // also inserted non-phi instructions.
- // TODO: Remove once VPWidenPointerInductionRecipe is also expanded in
- // convertToConcreteRecipes.
- VecPhi->moveBefore(State.Builder.GetInsertBlock()->getFirstNonPHIIt());
State.set(this, VecPhi);
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index fcbc86f..a7965a0 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -545,10 +545,8 @@ static bool isDeadRecipe(VPRecipeBase &R) {
}
void VPlanTransforms::removeDeadRecipes(VPlan &Plan) {
- ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT(
- Plan.getEntry());
-
- for (VPBasicBlock *VPBB : reverse(VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT))) {
+ for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
+ vp_post_order_deep(Plan.getEntry()))) {
// The recipes in the block are processed in reverse order, to catch chains
// of dead recipes.
for (VPRecipeBase &R : make_early_inc_range(reverse(*VPBB))) {
@@ -846,8 +844,8 @@ optimizeLatchExitInductionUser(VPlan &Plan, VPTypeAnalysis &TypeInfo,
if (ScalarTy->isIntegerTy())
return B.createNaryOp(Instruction::Sub, {EndValue, Step}, {}, "ind.escape");
if (ScalarTy->isPointerTy()) {
- auto *Zero = Plan.getOrAddLiveIn(
- ConstantInt::get(Step->getLiveInIRValue()->getType(), 0));
+ Type *StepTy = TypeInfo.inferScalarType(Step);
+ auto *Zero = Plan.getOrAddLiveIn(ConstantInt::get(StepTy, 0));
return B.createPtrAdd(EndValue,
B.createNaryOp(Instruction::Sub, {Zero, Step}), {},
"ind.escape");
@@ -965,6 +963,7 @@ static Value *tryToFoldLiveIns(const VPRecipeBase &R, unsigned Opcode,
RFlags.getGEPNoWrapFlags());
}
case VPInstruction::PtrAdd:
+ case VPInstruction::WidePtrAdd:
return Folder.FoldGEP(IntegerType::getInt8Ty(TypeInfo.getContext()), Ops[0],
Ops[1],
cast<VPRecipeWithIRFlags>(R).getGEPNoWrapFlags());
@@ -1431,15 +1430,15 @@ static bool isConditionTrueViaVFAndUF(VPValue *Cond, VPlan &Plan,
// count is not conveniently available as SCEV so far, so we compare directly
// against the original trip count. This is stricter than necessary, as we
// will only return true if the trip count == vector trip count.
- // TODO: Use SCEV for vector trip count once available, to cover cases where
- // vector trip count == UF * VF, but original trip count != UF * VF.
- const SCEV *TripCount =
- vputils::getSCEVExprForVPValue(Plan.getTripCount(), SE);
- assert(!isa<SCEVCouldNotCompute>(TripCount) &&
+ const SCEV *VectorTripCount =
+ vputils::getSCEVExprForVPValue(&Plan.getVectorTripCount(), SE);
+ if (isa<SCEVCouldNotCompute>(VectorTripCount))
+ VectorTripCount = vputils::getSCEVExprForVPValue(Plan.getTripCount(), SE);
+ assert(!isa<SCEVCouldNotCompute>(VectorTripCount) &&
"Trip count SCEV must be computable");
ElementCount NumElements = BestVF.multiplyCoefficientBy(BestUF);
- const SCEV *C = SE.getElementCount(TripCount->getType(), NumElements);
- return SE.isKnownPredicate(CmpInst::ICMP_EQ, TripCount, C);
+ const SCEV *C = SE.getElementCount(VectorTripCount->getType(), NumElements);
+ return SE.isKnownPredicate(CmpInst::ICMP_EQ, VectorTripCount, C);
}
/// Try to simplify the branch condition of \p Plan. This may restrict the
@@ -1504,10 +1503,8 @@ static bool simplifyBranchConditionForVFAndUF(VPlan &Plan, ElementCount BestVF,
} else {
// The vector region contains header phis for which we cannot remove the
// loop region yet.
- LLVMContext &Ctx = SE.getContext();
- auto *BOC = new VPInstruction(
- VPInstruction::BranchOnCond,
- {Plan.getOrAddLiveIn(ConstantInt::getTrue(Ctx))}, Term->getDebugLoc());
+ auto *BOC = new VPInstruction(VPInstruction::BranchOnCond, {Plan.getTrue()},
+ Term->getDebugLoc());
ExitingVPBB->appendRecipe(BOC);
}
@@ -2173,7 +2170,7 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
Type *CanonicalIVType = Plan.getCanonicalIV()->getScalarType();
VPTypeAnalysis TypeInfo(CanonicalIVType);
LLVMContext &Ctx = CanonicalIVType->getContext();
- VPValue *AllOneMask = Plan.getOrAddLiveIn(ConstantInt::getTrue(Ctx));
+ VPValue *AllOneMask = Plan.getTrue();
VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
VPBasicBlock *Header = LoopRegion->getEntryBasicBlock();
@@ -2754,6 +2751,70 @@ expandVPWidenIntOrFpInduction(VPWidenIntOrFpInductionRecipe *WidenIVR,
WidenIVR->replaceAllUsesWith(WidePHI);
}
+/// Expand a VPWidenPointerInductionRecipe into executable recipes, for the
+/// initial value, phi and backedge value. In the following example:
+///
+/// <x1> vector loop: {
+/// vector.body:
+/// EMIT ir<%ptr.iv> = WIDEN-POINTER-INDUCTION %start, %step, %vf
+/// ...
+/// EMIT branch-on-count ...
+/// }
+///
+/// WIDEN-POINTER-INDUCTION will get expanded to:
+///
+/// <x1> vector loop: {
+/// vector.body:
+/// EMIT-SCALAR %pointer.phi = phi %start, %ptr.ind
+/// EMIT %mul = mul %stepvector, %step
+/// EMIT %vector.gep = wide-ptradd %pointer.phi, %mul
+/// ...
+/// EMIT %ptr.ind = ptradd %pointer.phi, %vf
+/// EMIT branch-on-count ...
+/// }
+static void expandVPWidenPointerInduction(VPWidenPointerInductionRecipe *R,
+ VPTypeAnalysis &TypeInfo) {
+ VPlan *Plan = R->getParent()->getPlan();
+ VPValue *Start = R->getStartValue();
+ VPValue *Step = R->getStepValue();
+ VPValue *VF = R->getVFValue();
+
+ assert(R->getInductionDescriptor().getKind() ==
+ InductionDescriptor::IK_PtrInduction &&
+ "Not a pointer induction according to InductionDescriptor!");
+ assert(TypeInfo.inferScalarType(R)->isPointerTy() && "Unexpected type.");
+ assert(!R->onlyScalarsGenerated(Plan->hasScalableVF()) &&
+ "Recipe should have been replaced");
+
+ VPBuilder Builder(R);
+ DebugLoc DL = R->getDebugLoc();
+
+ // Build a scalar pointer phi.
+ VPPhi *ScalarPtrPhi = Builder.createScalarPhi(Start, DL, "pointer.phi");
+
+ // Create actual address geps that use the pointer phi as base and a
+ // vectorized version of the step value (<step*0, ..., step*N>) as offset.
+ Builder.setInsertPoint(R->getParent(), R->getParent()->getFirstNonPhi());
+ Type *StepTy = TypeInfo.inferScalarType(Step);
+ VPValue *Offset = Builder.createNaryOp(VPInstruction::StepVector, {}, StepTy);
+ Offset = Builder.createNaryOp(Instruction::Mul, {Offset, Step});
+ VPValue *PtrAdd = Builder.createNaryOp(
+ VPInstruction::WidePtrAdd, {ScalarPtrPhi, Offset}, DL, "vector.gep");
+ R->replaceAllUsesWith(PtrAdd);
+
+ // Create the backedge value for the scalar pointer phi.
+ Builder.setInsertPoint(R->getParent(), R->getParent()->getFirstNonPhi());
+ VF = Builder.createScalarZExtOrTrunc(VF, StepTy, TypeInfo.inferScalarType(VF),
+ DL);
+ VPValue *Inc = Builder.createNaryOp(Instruction::Mul, {Step, VF});
+
+ VPBasicBlock *ExitingBB = Plan->getVectorLoopRegion()->getExitingBasicBlock();
+ Builder.setInsertPoint(ExitingBB, ExitingBB->getTerminator()->getIterator());
+ VPValue *InductionGEP =
+ Builder.createPtrAdd(ScalarPtrPhi, Inc, DL, "ptr.ind");
+ ScalarPtrPhi->addOperand(InductionGEP);
+}
+
void VPlanTransforms::dissolveLoopRegions(VPlan &Plan) {
// Replace loop regions with explicity CFG.
SmallVector<VPRegionBlock *> LoopRegions;
@@ -2779,6 +2840,12 @@ void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan,
continue;
}
+ if (auto *WidenIVR = dyn_cast<VPWidenPointerInductionRecipe>(&R)) {
+ expandVPWidenPointerInduction(WidenIVR, TypeInfo);
+ ToRemove.push_back(WidenIVR);
+ continue;
+ }
+
// Expand VPBlendRecipe into VPInstruction::Select.
VPBuilder Builder(&R);
if (auto *Blend = dyn_cast<VPBlendRecipe>(&R)) {
@@ -3178,6 +3245,21 @@ void VPlanTransforms::materializeVectorTripCount(
Plan.getVectorTripCount().setUnderlyingValue(NewC->getValue());
}
+void VPlanTransforms::materializeBackedgeTakenCount(VPlan &Plan,
+ VPBasicBlock *VectorPH) {
+ VPValue *BTC = Plan.getOrCreateBackedgeTakenCount();
+ if (BTC->getNumUsers() == 0)
+ return;
+
+ VPBuilder Builder(VectorPH, VectorPH->begin());
+ auto *TCTy = VPTypeAnalysis(Plan).inferScalarType(Plan.getTripCount());
+ auto *TCMO = Builder.createNaryOp(
+ Instruction::Sub,
+ {Plan.getTripCount(), Plan.getOrAddLiveIn(ConstantInt::get(TCTy, 1))},
+ DebugLoc::getCompilerGenerated(), "trip.count.minus.1");
+ BTC->replaceAllUsesWith(TCMO);
+}
+
/// Returns true if \p V is VPWidenLoadRecipe or VPInterleaveRecipe that can be
/// converted to a narrower recipe. \p V is used by a wide recipe that feeds a
/// store interleave group at index \p Idx, \p WideMember0 is the recipe feeding
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index 880159f..5943684 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -256,6 +256,11 @@ struct VPlanTransforms {
unsigned BestUF,
PredicatedScalarEvolution &PSE);
+ /// Materialize the backedge-taken count to be computed explicitly using
+ /// VPInstructions.
+ static void materializeBackedgeTakenCount(VPlan &Plan,
+ VPBasicBlock *VectorPH);
+
/// Try to convert a plan with interleave groups with VF elements to a plan
/// with the interleave groups replaced by wide loads and stores processing VF
/// elements, if all transformed interleave groups access the full vector
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
index 871e37e..fc072de 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
@@ -65,7 +65,7 @@ class UnrollState {
/// Unroll a widen induction recipe \p IV. This introduces recipes to compute
/// the induction steps for each part.
- void unrollWidenInductionByUF(VPWidenIntOrFpInductionRecipe *IV,
+ void unrollWidenInductionByUF(VPWidenInductionRecipe *IV,
VPBasicBlock::iterator InsertPtForPhi);
VPValue *getConstantVPV(unsigned Part) {
@@ -148,7 +148,7 @@ void UnrollState::unrollReplicateRegionByUF(VPRegionBlock *VPR) {
}
void UnrollState::unrollWidenInductionByUF(
- VPWidenIntOrFpInductionRecipe *IV, VPBasicBlock::iterator InsertPtForPhi) {
+ VPWidenInductionRecipe *IV, VPBasicBlock::iterator InsertPtForPhi) {
VPBasicBlock *PH = cast<VPBasicBlock>(
IV->getParent()->getEnclosingLoopRegion()->getSinglePredecessor());
Type *IVTy = TypeInfo.inferScalarType(IV);
@@ -159,9 +159,11 @@ void UnrollState::unrollWidenInductionByUF(
VPValue *ScalarStep = IV->getStepValue();
VPBuilder Builder(PH);
+ Type *VectorStepTy =
+ IVTy->isPointerTy() ? TypeInfo.inferScalarType(ScalarStep) : IVTy;
VPInstruction *VectorStep = Builder.createNaryOp(
- VPInstruction::WideIVStep, {&Plan.getVF(), ScalarStep}, IVTy, Flags,
- IV->getDebugLoc());
+ VPInstruction::WideIVStep, {&Plan.getVF(), ScalarStep}, VectorStepTy,
+ Flags, IV->getDebugLoc());
ToSkip.insert(VectorStep);
@@ -169,8 +171,8 @@ void UnrollState::unrollWidenInductionByUF(
// remains the header phi. Parts > 0 are computed by adding Step to the
// previous part. The header phi recipe will get 2 new operands: the step
// value for a single part and the last part, used to compute the backedge
- // value during VPWidenIntOrFpInductionRecipe::execute. %Part.0 =
- // VPWidenIntOrFpInductionRecipe %Start, %ScalarStep, %VectorStep, %Part.3
+ // value during VPWidenInductionRecipe::execute.
+ // %Part.0 = VPWidenInductionRecipe %Start, %ScalarStep, %VectorStep, %Part.3
// %Part.1 = %Part.0 + %VectorStep
// %Part.2 = %Part.1 + %VectorStep
// %Part.3 = %Part.2 + %VectorStep
@@ -179,8 +181,13 @@ void UnrollState::unrollWidenInductionByUF(
// again.
VPValue *Prev = IV;
Builder.setInsertPoint(IV->getParent(), InsertPtForPhi);
- unsigned AddOpc =
- IVTy->isFloatingPointTy() ? ID.getInductionOpcode() : Instruction::Add;
+ unsigned AddOpc;
+ if (IVTy->isPointerTy())
+ AddOpc = VPInstruction::WidePtrAdd;
+ else if (IVTy->isFloatingPointTy())
+ AddOpc = ID.getInductionOpcode();
+ else
+ AddOpc = Instruction::Add;
for (unsigned Part = 1; Part != UF; ++Part) {
std::string Name =
Part > 1 ? "step.add." + std::to_string(Part) : "step.add";
@@ -207,7 +214,7 @@ void UnrollState::unrollHeaderPHIByUF(VPHeaderPHIRecipe *R,
return;
// Generate step vectors for each unrolled part.
- if (auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(R)) {
+ if (auto *IV = dyn_cast<VPWidenInductionRecipe>(R)) {
unrollWidenInductionByUF(IV, InsertPtForPhi);
return;
}
@@ -221,10 +228,7 @@ void UnrollState::unrollHeaderPHIByUF(VPHeaderPHIRecipe *R,
VPRecipeBase *Copy = R->clone();
Copy->insertBefore(*R->getParent(), InsertPt);
addRecipeForPart(R, Copy, Part);
- if (isa<VPWidenPointerInductionRecipe>(R)) {
- Copy->addOperand(R);
- Copy->addOperand(getConstantVPV(Part));
- } else if (RdxPhi) {
+ if (RdxPhi) {
// If the start value is a ReductionStartVector, use the identity value
// (second operand) for unrolled parts. If the scaling factor is > 1,
// create a new ReductionStartVector with the scale factor and both
@@ -450,8 +454,7 @@ void VPlanTransforms::unrollByUF(VPlan &Plan, unsigned UF, LLVMContext &Ctx) {
Unroller.remapOperand(&H, 1, UF - 1);
continue;
}
- if (Unroller.contains(H.getVPSingleValue()) ||
- isa<VPWidenPointerInductionRecipe>(&H)) {
+ if (Unroller.contains(H.getVPSingleValue())) {
Part = 1;
continue;
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
index 81bd21b..14f20c6 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
@@ -73,8 +73,11 @@ bool vputils::isHeaderMask(const VPValue *V, VPlan &Plan) {
}
const SCEV *vputils::getSCEVExprForVPValue(VPValue *V, ScalarEvolution &SE) {
- if (V->isLiveIn())
- return SE.getSCEV(V->getLiveInIRValue());
+ if (V->isLiveIn()) {
+ if (Value *LiveIn = V->getLiveInIRValue())
+ return SE.getSCEV(LiveIn);
+ return SE.getCouldNotCompute();
+ }
// TODO: Support constructing SCEVs for more recipes as needed.
return TypeSwitch<const VPRecipeBase *, const SCEV *>(V->getDefiningRecipe())