aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target
diff options
context:
space:
mode:
authorNAKAMURA Takumi <geek4civic@gmail.com>2025-01-09 17:16:04 +0900
committerNAKAMURA Takumi <geek4civic@gmail.com>2025-01-09 17:16:04 +0900
commit0aa930a41f2d1ebf1fa90ec42da8f96d15a4dcbb (patch)
tree6a77b463f700e090df586672c26b9fe765fd115b /llvm/lib/Target
parentec6892d1c979ce0b84c86918d5cdbb03037b409a (diff)
parent6d16b1c5c468a79ecf867293023c89ac518ecdda (diff)
downloadllvm-users/chapuni/cov/single/nextcount-base.zip
llvm-users/chapuni/cov/single/nextcount-base.tar.gz
llvm-users/chapuni/cov/single/nextcount-base.tar.bz2
Merge branch 'users/chapuni/cov/single/pair' into users/chapuni/cov/single/nextcount-baseusers/chapuni/cov/single/nextcount-base
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp48
-rw-r--r--llvm/lib/Target/AArch64/AArch64Combine.td11
-rw-r--r--llvm/lib/Target/AArch64/AArch64FMV.td105
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp2
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp177
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.h1
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrFormats.td7
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.td32
-rw-r--r--llvm/lib/Target/AArch64/AArch64PointerAuth.cpp12
-rw-r--r--llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td32
-rw-r--r--llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td8
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td54
-rw-r--r--llvm/lib/Target/AArch64/AArch64SystemOperands.td527
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp89
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h4
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp41
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h1
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp13
-rw-r--r--llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp29
-rw-r--r--llvm/lib/Target/AArch64/SVEInstrFormats.td102
-rw-r--r--llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp47
-rw-r--r--llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h110
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp3
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUCombine.td14
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp71
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.h13
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp3
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp10
-rw-r--r--llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp8
-rw-r--r--llvm/lib/Target/AMDGPU/BUFInstructions.td8
-rw-r--r--llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp25
-rw-r--r--llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h1
-rw-r--r--llvm/lib/Target/AMDGPU/MIMGInstructions.td66
-rw-r--r--llvm/lib/Target/AMDGPU/SIFoldOperands.cpp15
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp114
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.h4
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp44
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstructions.td21
-rw-r--r--llvm/lib/Target/AMDGPU/SIRegisterInfo.td47
-rw-r--r--llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp5
-rw-r--r--llvm/lib/Target/AMDGPU/SMInstructions.td84
-rw-r--r--llvm/lib/Target/AMDGPU/VOP1Instructions.td20
-rw-r--r--llvm/lib/Target/AMDGPU/VOP3Instructions.td20
-rw-r--r--llvm/lib/Target/AMDGPU/VOPCInstructions.td132
-rw-r--r--llvm/lib/Target/AMDGPU/VOPInstructions.td4
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.cpp2
-rw-r--r--llvm/lib/Target/ARM/ARMInstrInfo.td3
-rw-r--r--llvm/lib/Target/ARM/ARMSystemRegister.td47
-rw-r--r--llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp2
-rw-r--r--llvm/lib/Target/ARM/Utils/ARMBaseInfo.cpp4
-rw-r--r--llvm/lib/Target/ARM/Utils/ARMBaseInfo.h8
-rw-r--r--llvm/lib/Target/AVR/AVRDevices.td50
-rw-r--r--llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp69
-rw-r--r--llvm/lib/Target/DirectX/DXIL.td19
-rw-r--r--llvm/lib/Target/DirectX/DXILOpBuilder.cpp4
-rw-r--r--llvm/lib/Target/DirectX/DXILOpLowering.cpp60
-rw-r--r--llvm/lib/Target/DirectX/DXILResourceAccess.cpp13
-rw-r--r--llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp8
-rw-r--r--llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp2
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp39
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp12
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp21
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp9
-rw-r--r--llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h3
-rw-r--r--llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp2
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.cpp18
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp12
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXISelLowering.h3
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXInstrInfo.td610
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXIntrinsics.td342
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp19
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXSubtarget.h7
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp5
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXUtilities.cpp6
-rw-r--r--llvm/lib/Target/NVPTX/NVVMReflect.cpp9
-rw-r--r--llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp16
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp11
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrInfo.cpp4
-rw-r--r--llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp37
-rw-r--r--llvm/lib/Target/RISCV/CMakeLists.txt1
-rw-r--r--llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp14
-rw-r--r--llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp35
-rw-r--r--llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp28
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp5
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h10
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp2
-rw-r--r--llvm/lib/Target/RISCV/RISCV.td6
-rw-r--r--llvm/lib/Target/RISCV/RISCVCombine.td2
-rw-r--r--llvm/lib/Target/RISCV/RISCVFeatures.td28
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.cpp137
-rw-r--r--llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp5
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoD.td2
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td32
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td107
-rw-r--r--llvm/lib/Target/RISCV/RISCVPfmCounters.td18
-rw-r--r--llvm/lib/Target/RISCV/RISCVProcessors.td19
-rw-r--r--llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td2
-rw-r--r--llvm/lib/Target/RISCV/RISCVSchedule.td1
-rw-r--r--llvm/lib/Target/RISCV/RISCVSystemOperands.td46
-rw-r--r--llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp24
-rw-r--r--llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h3
-rw-r--r--llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp569
-rw-r--r--llvm/lib/Target/SPIRV/CMakeLists.txt1
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp13
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp2
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVDuplicatesTracker.cpp136
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVDuplicatesTracker.h16
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp8
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp1
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h17
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVInstrInfo.cpp13
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVInstrInfo.h1
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp44
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp372
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h38
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp5
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVUtils.h5
-rw-r--r--llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp7
-rw-r--r--llvm/lib/Target/TargetMachine.cpp2
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp12
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp19
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp12
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp12
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp143
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.h4
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX10.td111
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td246
-rw-r--r--llvm/lib/Target/X86/X86InstrFMA3Info.cpp15
-rw-r--r--llvm/lib/Target/X86/X86InstrFragmentsSIMD.td7
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.cpp294
-rw-r--r--llvm/lib/Target/X86/X86LoadValueInjectionRetHardening.cpp4
-rw-r--r--llvm/lib/Target/X86/X86SchedSapphireRapids.td52
-rw-r--r--llvm/lib/Target/X86/X86ScheduleZnver4.td4
-rw-r--r--llvm/lib/Target/X86/X86TargetTransformInfo.cpp24
137 files changed, 4078 insertions, 2324 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
index 69d07f2..9bec782 100644
--- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -2730,6 +2730,54 @@ void AArch64AsmPrinter::emitInstruction(const MachineInstr *MI) {
EmitToStreamer(*OutStreamer, TmpInstSB);
return;
}
+ case AArch64::TLSDESC_AUTH_CALLSEQ: {
+ /// lower this to:
+ /// adrp x0, :tlsdesc_auth:var
+ /// ldr x16, [x0, #:tlsdesc_auth_lo12:var]
+ /// add x0, x0, #:tlsdesc_auth_lo12:var
+ /// blraa x16, x0
+ /// (TPIDR_EL0 offset now in x0)
+ const MachineOperand &MO_Sym = MI->getOperand(0);
+ MachineOperand MO_TLSDESC_LO12(MO_Sym), MO_TLSDESC(MO_Sym);
+ MCOperand SymTLSDescLo12, SymTLSDesc;
+ MO_TLSDESC_LO12.setTargetFlags(AArch64II::MO_TLS | AArch64II::MO_PAGEOFF);
+ MO_TLSDESC.setTargetFlags(AArch64II::MO_TLS | AArch64II::MO_PAGE);
+ MCInstLowering.lowerOperand(MO_TLSDESC_LO12, SymTLSDescLo12);
+ MCInstLowering.lowerOperand(MO_TLSDESC, SymTLSDesc);
+
+ MCInst Adrp;
+ Adrp.setOpcode(AArch64::ADRP);
+ Adrp.addOperand(MCOperand::createReg(AArch64::X0));
+ Adrp.addOperand(SymTLSDesc);
+ EmitToStreamer(*OutStreamer, Adrp);
+
+ MCInst Ldr;
+ Ldr.setOpcode(AArch64::LDRXui);
+ Ldr.addOperand(MCOperand::createReg(AArch64::X16));
+ Ldr.addOperand(MCOperand::createReg(AArch64::X0));
+ Ldr.addOperand(SymTLSDescLo12);
+ Ldr.addOperand(MCOperand::createImm(0));
+ EmitToStreamer(*OutStreamer, Ldr);
+
+ MCInst Add;
+ Add.setOpcode(AArch64::ADDXri);
+ Add.addOperand(MCOperand::createReg(AArch64::X0));
+ Add.addOperand(MCOperand::createReg(AArch64::X0));
+ Add.addOperand(SymTLSDescLo12);
+ Add.addOperand(MCOperand::createImm(AArch64_AM::getShiftValue(0)));
+ EmitToStreamer(*OutStreamer, Add);
+
+ // Authenticated TLSDESC accesses are not relaxed.
+ // Thus, do not emit .tlsdesccall for AUTH TLSDESC.
+
+ MCInst Blraa;
+ Blraa.setOpcode(AArch64::BLRAA);
+ Blraa.addOperand(MCOperand::createReg(AArch64::X16));
+ Blraa.addOperand(MCOperand::createReg(AArch64::X0));
+ EmitToStreamer(*OutStreamer, Blraa);
+
+ return;
+ }
case AArch64::TLSDESC_CALLSEQ: {
/// lower this to:
/// adrp x0, :tlsdesc:var
diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index 1b1d81f..ce19806 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -131,6 +131,15 @@ def ext: GICombineRule <
(apply [{ applyEXT(*${root}, ${matchinfo}); }])
>;
+def fullrev: GICombineRule <
+ (defs root:$root, shuffle_matchdata:$matchinfo),
+ (match (G_IMPLICIT_DEF $src2),
+ (G_SHUFFLE_VECTOR $src, $src1, $src2, $mask):$root,
+ [{ return ShuffleVectorInst::isReverseMask(${mask}.getShuffleMask(),
+ ${mask}.getShuffleMask().size()); }]),
+ (apply [{ applyFullRev(*${root}, MRI); }])
+>;
+
def insertelt_nonconst: GICombineRule <
(defs root:$root, shuffle_matchdata:$matchinfo),
(match (wip_match_opcode G_INSERT_VECTOR_ELT):$root,
@@ -163,7 +172,7 @@ def form_duplane : GICombineRule <
(apply [{ applyDupLane(*${root}, MRI, B, ${matchinfo}); }])
>;
-def shuffle_vector_lowering : GICombineGroup<[dup, rev, ext, zip, uzp, trn,
+def shuffle_vector_lowering : GICombineGroup<[dup, rev, ext, zip, uzp, trn, fullrev,
form_duplane, shuf_to_ins]>;
// Turn G_UNMERGE_VALUES -> G_EXTRACT_VECTOR_ELT's
diff --git a/llvm/lib/Target/AArch64/AArch64FMV.td b/llvm/lib/Target/AArch64/AArch64FMV.td
index fc7a94a..e0f56fd 100644
--- a/llvm/lib/Target/AArch64/AArch64FMV.td
+++ b/llvm/lib/Target/AArch64/AArch64FMV.td
@@ -22,64 +22,65 @@
// Something you can add to target_version or target_clones.
-class FMVExtension<string n, string b, int p> {
+class FMVExtension<string name, string enumeration> {
// Name, as spelled in target_version or target_clones. e.g. "memtag".
- string Name = n;
+ string Name = name;
// A C++ expression giving the number of the bit in the FMV ABI.
// Currently this is given as a value from the enum "CPUFeatures".
- string Bit = b;
+ string FeatureBit = "FEAT_" # enumeration;
// SubtargetFeature enabled for codegen when this FMV feature is present.
- string BackendFeature = n;
+ string BackendFeature = name;
- // The FMV priority.
- int Priority = p;
+ // A C++ expression giving the number of the priority bit.
+ // Currently this is given as a value from the enum "FeatPriorities".
+ string PriorityBit = "PRIOR_" # enumeration;
}
-def : FMVExtension<"aes", "FEAT_PMULL", 150>;
-def : FMVExtension<"bf16", "FEAT_BF16", 280>;
-def : FMVExtension<"bti", "FEAT_BTI", 510>;
-def : FMVExtension<"crc", "FEAT_CRC", 110>;
-def : FMVExtension<"dit", "FEAT_DIT", 180>;
-def : FMVExtension<"dotprod", "FEAT_DOTPROD", 104>;
-let BackendFeature = "ccpp" in def : FMVExtension<"dpb", "FEAT_DPB", 190>;
-let BackendFeature = "ccdp" in def : FMVExtension<"dpb2", "FEAT_DPB2", 200>;
-def : FMVExtension<"f32mm", "FEAT_SVE_F32MM", 350>;
-def : FMVExtension<"f64mm", "FEAT_SVE_F64MM", 360>;
-def : FMVExtension<"fcma", "FEAT_FCMA", 220>;
-def : FMVExtension<"flagm", "FEAT_FLAGM", 20>;
-let BackendFeature = "altnzcv" in def : FMVExtension<"flagm2", "FEAT_FLAGM2", 30>;
-def : FMVExtension<"fp", "FEAT_FP", 90>;
-def : FMVExtension<"fp16", "FEAT_FP16", 170>;
-def : FMVExtension<"fp16fml", "FEAT_FP16FML", 175>;
-let BackendFeature = "fptoint" in def : FMVExtension<"frintts", "FEAT_FRINTTS", 250>;
-def : FMVExtension<"i8mm", "FEAT_I8MM", 270>;
-def : FMVExtension<"jscvt", "FEAT_JSCVT", 210>;
-def : FMVExtension<"ls64", "FEAT_LS64_ACCDATA", 520>;
-def : FMVExtension<"lse", "FEAT_LSE", 80>;
-def : FMVExtension<"memtag", "FEAT_MEMTAG2", 440>;
-def : FMVExtension<"mops", "FEAT_MOPS", 650>;
-def : FMVExtension<"predres", "FEAT_PREDRES", 480>;
-def : FMVExtension<"rcpc", "FEAT_RCPC", 230>;
-let BackendFeature = "rcpc-immo" in def : FMVExtension<"rcpc2", "FEAT_RCPC2", 240>;
-def : FMVExtension<"rcpc3", "FEAT_RCPC3", 241>;
-def : FMVExtension<"rdm", "FEAT_RDM", 108>;
-def : FMVExtension<"rng", "FEAT_RNG", 10>;
-def : FMVExtension<"sb", "FEAT_SB", 470>;
-def : FMVExtension<"sha2", "FEAT_SHA2", 130>;
-def : FMVExtension<"sha3", "FEAT_SHA3", 140>;
-def : FMVExtension<"simd", "FEAT_SIMD", 100>;
-def : FMVExtension<"sm4", "FEAT_SM4", 106>;
-def : FMVExtension<"sme", "FEAT_SME", 430>;
-def : FMVExtension<"sme-f64f64", "FEAT_SME_F64", 560>;
-def : FMVExtension<"sme-i16i64", "FEAT_SME_I64", 570>;
-def : FMVExtension<"sme2", "FEAT_SME2", 580>;
-def : FMVExtension<"ssbs", "FEAT_SSBS2", 490>;
-def : FMVExtension<"sve", "FEAT_SVE", 310>;
-def : FMVExtension<"sve2", "FEAT_SVE2", 370>;
-def : FMVExtension<"sve2-aes", "FEAT_SVE_PMULL128", 380>;
-def : FMVExtension<"sve2-bitperm", "FEAT_SVE_BITPERM", 400>;
-def : FMVExtension<"sve2-sha3", "FEAT_SVE_SHA3", 410>;
-def : FMVExtension<"sve2-sm4", "FEAT_SVE_SM4", 420>;
-def : FMVExtension<"wfxt", "FEAT_WFXT", 550>;
+def : FMVExtension<"aes", "PMULL">;
+def : FMVExtension<"bf16", "BF16">;
+def : FMVExtension<"bti", "BTI">;
+def : FMVExtension<"crc", "CRC">;
+def : FMVExtension<"dit", "DIT">;
+def : FMVExtension<"dotprod", "DOTPROD">;
+let BackendFeature = "ccpp" in def : FMVExtension<"dpb", "DPB">;
+let BackendFeature = "ccdp" in def : FMVExtension<"dpb2", "DPB2">;
+def : FMVExtension<"f32mm", "SVE_F32MM">;
+def : FMVExtension<"f64mm", "SVE_F64MM">;
+def : FMVExtension<"fcma", "FCMA">;
+def : FMVExtension<"flagm", "FLAGM">;
+let BackendFeature = "altnzcv" in def : FMVExtension<"flagm2", "FLAGM2">;
+def : FMVExtension<"fp", "FP">;
+def : FMVExtension<"fp16", "FP16">;
+def : FMVExtension<"fp16fml", "FP16FML">;
+let BackendFeature = "fptoint" in def : FMVExtension<"frintts", "FRINTTS">;
+def : FMVExtension<"i8mm", "I8MM">;
+def : FMVExtension<"jscvt", "JSCVT">;
+def : FMVExtension<"ls64", "LS64_ACCDATA">;
+def : FMVExtension<"lse", "LSE">;
+def : FMVExtension<"memtag", "MEMTAG2">;
+def : FMVExtension<"mops", "MOPS">;
+def : FMVExtension<"predres", "PREDRES">;
+def : FMVExtension<"rcpc", "RCPC">;
+let BackendFeature = "rcpc-immo" in def : FMVExtension<"rcpc2", "RCPC2">;
+def : FMVExtension<"rcpc3", "RCPC3">;
+def : FMVExtension<"rdm", "RDM">;
+def : FMVExtension<"rng", "RNG">;
+def : FMVExtension<"sb", "SB">;
+def : FMVExtension<"sha2", "SHA2">;
+def : FMVExtension<"sha3", "SHA3">;
+def : FMVExtension<"simd", "SIMD">;
+def : FMVExtension<"sm4", "SM4">;
+def : FMVExtension<"sme", "SME">;
+def : FMVExtension<"sme-f64f64", "SME_F64">;
+def : FMVExtension<"sme-i16i64", "SME_I64">;
+def : FMVExtension<"sme2", "SME2">;
+def : FMVExtension<"ssbs", "SSBS2">;
+def : FMVExtension<"sve", "SVE">;
+def : FMVExtension<"sve2", "SVE2">;
+def : FMVExtension<"sve2-aes", "SVE_PMULL128">;
+def : FMVExtension<"sve2-bitperm", "SVE_BITPERM">;
+def : FMVExtension<"sve2-sha3", "SVE_SHA3">;
+def : FMVExtension<"sve2-sm4", "SVE_SM4">;
+def : FMVExtension<"wfxt", "WFXT">;
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index ff3ca8a..6aa8cd4 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -228,6 +228,8 @@ public:
return false;
}
+ bool SelectAny(SDValue) { return true; }
+
bool SelectDupZero(SDValue N) {
switch(N->getOpcode()) {
case AArch64ISD::DUP:
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index a6f8f47..3ad2905 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -753,6 +753,14 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(Op, MVT::v8bf16, Expand);
}
+ // For bf16, fpextend is custom lowered to be optionally expanded into shifts.
+ setOperationAction(ISD::FP_EXTEND, MVT::f32, Custom);
+ setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom);
+ setOperationAction(ISD::FP_EXTEND, MVT::v4f32, Custom);
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Custom);
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Custom);
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v4f32, Custom);
+
auto LegalizeNarrowFP = [this](MVT ScalarVT) {
for (auto Op : {
ISD::SETCC,
@@ -893,10 +901,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(Op, MVT::f16, Legal);
}
- // Strict conversion to a larger type is legal
- for (auto VT : {MVT::f32, MVT::f64})
- setOperationAction(ISD::STRICT_FP_EXTEND, VT, Legal);
-
setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
setOperationAction(ISD::GET_ROUNDING, MVT::i32, Custom);
@@ -1183,6 +1187,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setMaxDivRemBitWidthSupported(128);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+ if (Subtarget->hasSME())
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
if (Subtarget->isNeonAvailable()) {
// FIXME: v1f64 shouldn't be legal if we can avoid it, because it leads to
@@ -2669,6 +2675,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(AArch64ISD::CSINC)
MAKE_CASE(AArch64ISD::THREAD_POINTER)
MAKE_CASE(AArch64ISD::TLSDESC_CALLSEQ)
+ MAKE_CASE(AArch64ISD::TLSDESC_AUTH_CALLSEQ)
MAKE_CASE(AArch64ISD::PROBED_ALLOCA)
MAKE_CASE(AArch64ISD::ABDS_PRED)
MAKE_CASE(AArch64ISD::ABDU_PRED)
@@ -4495,6 +4502,54 @@ SDValue AArch64TargetLowering::LowerFP_EXTEND(SDValue Op,
if (useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable()))
return LowerFixedLengthFPExtendToSVE(Op, DAG);
+ bool IsStrict = Op->isStrictFPOpcode();
+ SDValue Op0 = Op.getOperand(IsStrict ? 1 : 0);
+ EVT Op0VT = Op0.getValueType();
+ if (VT == MVT::f64) {
+ // FP16->FP32 extends are legal for v32 and v4f32.
+ if (Op0VT == MVT::f32 || Op0VT == MVT::f16)
+ return Op;
+ // Split bf16->f64 extends into two fpextends.
+ if (Op0VT == MVT::bf16 && IsStrict) {
+ SDValue Ext1 =
+ DAG.getNode(ISD::STRICT_FP_EXTEND, SDLoc(Op), {MVT::f32, MVT::Other},
+ {Op0, Op.getOperand(0)});
+ return DAG.getNode(ISD::STRICT_FP_EXTEND, SDLoc(Op), {VT, MVT::Other},
+ {Ext1, Ext1.getValue(1)});
+ }
+ if (Op0VT == MVT::bf16)
+ return DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), VT,
+ DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, Op0));
+ return SDValue();
+ }
+
+ if (VT.getScalarType() == MVT::f32) {
+ // FP16->FP32 extends are legal for v32 and v4f32.
+ if (Op0VT.getScalarType() == MVT::f16)
+ return Op;
+ if (Op0VT.getScalarType() == MVT::bf16) {
+ SDLoc DL(Op);
+ EVT IVT = VT.changeTypeToInteger();
+ if (!Op0VT.isVector()) {
+ Op0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v4bf16, Op0);
+ IVT = MVT::v4i32;
+ }
+
+ EVT Op0IVT = Op0.getValueType().changeTypeToInteger();
+ SDValue Ext =
+ DAG.getNode(ISD::ANY_EXTEND, DL, IVT, DAG.getBitcast(Op0IVT, Op0));
+ SDValue Shift =
+ DAG.getNode(ISD::SHL, DL, IVT, Ext, DAG.getConstant(16, DL, IVT));
+ if (!Op0VT.isVector())
+ Shift = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Shift,
+ DAG.getConstant(0, DL, MVT::i64));
+ Shift = DAG.getBitcast(VT, Shift);
+ return IsStrict ? DAG.getMergeValues({Shift, Op.getOperand(0)}, DL)
+ : Shift;
+ }
+ return SDValue();
+ }
+
assert(Op.getValueType() == MVT::f128 && "Unexpected lowering");
return SDValue();
}
@@ -7342,6 +7397,7 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
case ISD::STRICT_FP_ROUND:
return LowerFP_ROUND(Op, DAG);
case ISD::FP_EXTEND:
+ case ISD::STRICT_FP_EXTEND:
return LowerFP_EXTEND(Op, DAG);
case ISD::FRAMEADDR:
return LowerFRAMEADDR(Op, DAG);
@@ -10123,8 +10179,11 @@ SDValue AArch64TargetLowering::LowerELFTLSDescCallSeq(SDValue SymAddr,
SDValue Chain = DAG.getEntryNode();
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
- Chain =
- DAG.getNode(AArch64ISD::TLSDESC_CALLSEQ, DL, NodeTys, {Chain, SymAddr});
+ unsigned Opcode =
+ DAG.getMachineFunction().getInfo<AArch64FunctionInfo>()->hasELFSignedGOT()
+ ? AArch64ISD::TLSDESC_AUTH_CALLSEQ
+ : AArch64ISD::TLSDESC_CALLSEQ;
+ Chain = DAG.getNode(Opcode, DL, NodeTys, {Chain, SymAddr});
SDValue Glue = Chain.getValue(1);
return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Glue);
@@ -10136,8 +10195,12 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
assert(Subtarget->isTargetELF() && "This function expects an ELF target");
const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
+ AArch64FunctionInfo *MFI =
+ DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
- TLSModel::Model Model = getTargetMachine().getTLSModel(GA->getGlobal());
+ TLSModel::Model Model = MFI->hasELFSignedGOT()
+ ? TLSModel::GeneralDynamic
+ : getTargetMachine().getTLSModel(GA->getGlobal());
if (!EnableAArch64ELFLocalDynamicTLSGeneration) {
if (Model == TLSModel::LocalDynamic)
@@ -10174,8 +10237,6 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
// calculation.
// These accesses will need deduplicating if there's more than one.
- AArch64FunctionInfo *MFI =
- DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
MFI->incNumLocalDynamicTLSAccesses();
// The call needs a relocation too for linker relaxation. It doesn't make
@@ -18424,7 +18485,7 @@ static SDValue performUADDVAddCombine(SDValue A, SelectionDAG &DAG) {
EVT VT = A.getValueType();
SDValue Op0 = A.getOperand(0);
SDValue Op1 = A.getOperand(1);
- if (Op0.getOpcode() != Op0.getOpcode() ||
+ if (Op0.getOpcode() != Op1.getOpcode() ||
(Op0.getOpcode() != ISD::ZERO_EXTEND &&
Op0.getOpcode() != ISD::SIGN_EXTEND))
return SDValue();
@@ -21981,21 +22042,35 @@ SDValue tryLowerPartialReductionToDot(SDNode *N,
SDLoc DL(N);
SDValue Op2 = N->getOperand(2);
- if (Op2->getOpcode() != ISD::MUL ||
- !ISD::isExtOpcode(Op2->getOperand(0)->getOpcode()) ||
- !ISD::isExtOpcode(Op2->getOperand(1)->getOpcode()))
- return SDValue();
+ unsigned Op2Opcode = Op2->getOpcode();
+ SDValue MulOpLHS, MulOpRHS;
+ bool MulOpLHSIsSigned, MulOpRHSIsSigned;
+ if (ISD::isExtOpcode(Op2Opcode)) {
+ MulOpLHSIsSigned = MulOpRHSIsSigned = (Op2Opcode == ISD::SIGN_EXTEND);
+ MulOpLHS = Op2->getOperand(0);
+ MulOpRHS = DAG.getConstant(1, DL, MulOpLHS.getValueType());
+ } else if (Op2Opcode == ISD::MUL) {
+ SDValue ExtMulOpLHS = Op2->getOperand(0);
+ SDValue ExtMulOpRHS = Op2->getOperand(1);
+
+ unsigned ExtMulOpLHSOpcode = ExtMulOpLHS->getOpcode();
+ unsigned ExtMulOpRHSOpcode = ExtMulOpRHS->getOpcode();
+ if (!ISD::isExtOpcode(ExtMulOpLHSOpcode) ||
+ !ISD::isExtOpcode(ExtMulOpRHSOpcode))
+ return SDValue();
- SDValue Acc = N->getOperand(1);
- SDValue Mul = N->getOperand(2);
- SDValue ExtMulOpLHS = Mul->getOperand(0);
- SDValue ExtMulOpRHS = Mul->getOperand(1);
+ MulOpLHSIsSigned = ExtMulOpLHSOpcode == ISD::SIGN_EXTEND;
+ MulOpRHSIsSigned = ExtMulOpRHSOpcode == ISD::SIGN_EXTEND;
+
+ MulOpLHS = ExtMulOpLHS->getOperand(0);
+ MulOpRHS = ExtMulOpRHS->getOperand(0);
- SDValue MulOpLHS = ExtMulOpLHS->getOperand(0);
- SDValue MulOpRHS = ExtMulOpRHS->getOperand(0);
- if (MulOpLHS.getValueType() != MulOpRHS.getValueType())
+ if (MulOpLHS.getValueType() != MulOpRHS.getValueType())
+ return SDValue();
+ } else
return SDValue();
+ SDValue Acc = N->getOperand(1);
EVT ReducedVT = N->getValueType(0);
EVT MulSrcVT = MulOpLHS.getValueType();
@@ -22009,8 +22084,6 @@ SDValue tryLowerPartialReductionToDot(SDNode *N,
!(ReducedVT == MVT::v2i32 && MulSrcVT == MVT::v8i8))
return SDValue();
- bool MulOpLHSIsSigned = ExtMulOpLHS->getOpcode() == ISD::SIGN_EXTEND;
- bool MulOpRHSIsSigned = ExtMulOpRHS->getOpcode() == ISD::SIGN_EXTEND;
// If the extensions are mixed, we should lower it to a usdot instead
unsigned Opcode = 0;
if (MulOpLHSIsSigned != MulOpRHSIsSigned) {
@@ -22026,10 +22099,8 @@ SDValue tryLowerPartialReductionToDot(SDNode *N,
// USDOT expects the signed operand to be last
if (!MulOpRHSIsSigned)
std::swap(MulOpLHS, MulOpRHS);
- } else if (MulOpLHSIsSigned)
- Opcode = AArch64ISD::SDOT;
- else
- Opcode = AArch64ISD::UDOT;
+ } else
+ Opcode = MulOpLHSIsSigned ? AArch64ISD::SDOT : AArch64ISD::UDOT;
// Partial reduction lowering for (nx)v16i8 to (nx)v4i64 requires an i32 dot
// product followed by a zero / sign extension
@@ -27413,6 +27484,15 @@ void AArch64TargetLowering::ReplaceNodeResults(
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V));
return;
}
+ case Intrinsic::aarch64_sme_in_streaming_mode: {
+ SDLoc DL(N);
+ SDValue Chain = DAG.getEntryNode();
+ SDValue RuntimePStateSM =
+ getRuntimePStateSM(DAG, Chain, DL, N->getValueType(0));
+ Results.push_back(
+ DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, RuntimePStateSM));
+ return;
+ }
case Intrinsic::experimental_vector_match:
case Intrinsic::get_active_lane_mask: {
if (!VT.isFixedLengthVector() || VT.getVectorElementType() != MVT::i1)
@@ -29648,9 +29728,16 @@ bool AArch64TargetLowering::isComplexDeinterleavingOperationSupported(
if (ScalarTy->isIntegerTy() && Subtarget->hasSVE2() && VTy->isScalableTy()) {
unsigned ScalarWidth = ScalarTy->getScalarSizeInBits();
+
+ if (Operation == ComplexDeinterleavingOperation::CDot)
+ return ScalarWidth == 32 || ScalarWidth == 64;
return 8 <= ScalarWidth && ScalarWidth <= 64;
}
+ // CDot is not supported outside of scalable/sve scopes
+ if (Operation == ComplexDeinterleavingOperation::CDot)
+ return false;
+
return (ScalarTy->isHalfTy() && Subtarget->hasFullFP16()) ||
ScalarTy->isFloatTy() || ScalarTy->isDoubleTy();
}
@@ -29660,6 +29747,8 @@ Value *AArch64TargetLowering::createComplexDeinterleavingIR(
ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB,
Value *Accumulator) const {
VectorType *Ty = cast<VectorType>(InputA->getType());
+ if (Accumulator == nullptr)
+ Accumulator = Constant::getNullValue(Ty);
bool IsScalable = Ty->isScalableTy();
bool IsInt = Ty->getElementType()->isIntegerTy();
@@ -29671,6 +29760,10 @@ Value *AArch64TargetLowering::createComplexDeinterleavingIR(
if (TyWidth > 128) {
int Stride = Ty->getElementCount().getKnownMinValue() / 2;
+ int AccStride = cast<VectorType>(Accumulator->getType())
+ ->getElementCount()
+ .getKnownMinValue() /
+ 2;
auto *HalfTy = VectorType::getHalfElementsVectorType(Ty);
auto *LowerSplitA = B.CreateExtractVector(HalfTy, InputA, B.getInt64(0));
auto *LowerSplitB = B.CreateExtractVector(HalfTy, InputB, B.getInt64(0));
@@ -29680,25 +29773,26 @@ Value *AArch64TargetLowering::createComplexDeinterleavingIR(
B.CreateExtractVector(HalfTy, InputB, B.getInt64(Stride));
Value *LowerSplitAcc = nullptr;
Value *UpperSplitAcc = nullptr;
- if (Accumulator) {
- LowerSplitAcc = B.CreateExtractVector(HalfTy, Accumulator, B.getInt64(0));
- UpperSplitAcc =
- B.CreateExtractVector(HalfTy, Accumulator, B.getInt64(Stride));
- }
+ Type *FullTy = Ty;
+ FullTy = Accumulator->getType();
+ auto *HalfAccTy = VectorType::getHalfElementsVectorType(
+ cast<VectorType>(Accumulator->getType()));
+ LowerSplitAcc =
+ B.CreateExtractVector(HalfAccTy, Accumulator, B.getInt64(0));
+ UpperSplitAcc =
+ B.CreateExtractVector(HalfAccTy, Accumulator, B.getInt64(AccStride));
auto *LowerSplitInt = createComplexDeinterleavingIR(
B, OperationType, Rotation, LowerSplitA, LowerSplitB, LowerSplitAcc);
auto *UpperSplitInt = createComplexDeinterleavingIR(
B, OperationType, Rotation, UpperSplitA, UpperSplitB, UpperSplitAcc);
- auto *Result = B.CreateInsertVector(Ty, PoisonValue::get(Ty), LowerSplitInt,
- B.getInt64(0));
- return B.CreateInsertVector(Ty, Result, UpperSplitInt, B.getInt64(Stride));
+ auto *Result = B.CreateInsertVector(FullTy, PoisonValue::get(FullTy),
+ LowerSplitInt, B.getInt64(0));
+ return B.CreateInsertVector(FullTy, Result, UpperSplitInt,
+ B.getInt64(AccStride));
}
if (OperationType == ComplexDeinterleavingOperation::CMulPartial) {
- if (Accumulator == nullptr)
- Accumulator = Constant::getNullValue(Ty);
-
if (IsScalable) {
if (IsInt)
return B.CreateIntrinsic(
@@ -29750,6 +29844,13 @@ Value *AArch64TargetLowering::createComplexDeinterleavingIR(
return B.CreateIntrinsic(IntId, Ty, {InputA, InputB});
}
+ if (OperationType == ComplexDeinterleavingOperation::CDot && IsInt &&
+ IsScalable) {
+ return B.CreateIntrinsic(
+ Intrinsic::aarch64_sve_cdot, Accumulator->getType(),
+ {Accumulator, InputA, InputB, B.getInt32((int)Rotation * 90)});
+ }
+
return nullptr;
}
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 1b7f328..85b62be 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -83,6 +83,7 @@ enum NodeType : unsigned {
// Produces the full sequence of instructions for getting the thread pointer
// offset of a variable into X0, using the TLSDesc model.
TLSDESC_CALLSEQ,
+ TLSDESC_AUTH_CALLSEQ,
ADRP, // Page address of a TargetGlobalAddress operand.
ADR, // ADR
ADDlow, // Add the low 12 bits of a TargetGlobalAddress operand.
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 47c4c6c..f527f7e 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -1804,7 +1804,9 @@ class TMSystemException<bits<3> op1, string asm, list<dag> pattern>
}
class APASI : SimpleSystemI<0, (ins GPR64:$Xt), "apas", "\t$Xt">, Sched<[]> {
+ bits<5> Xt;
let Inst{20-5} = 0b0111001110000000;
+ let Inst{4-0} = Xt;
let DecoderNamespace = "APAS";
}
@@ -2768,6 +2770,8 @@ class MulHi<bits<3> opc, string asm, SDNode OpNode>
let Inst{23-21} = opc;
let Inst{20-16} = Rm;
let Inst{15} = 0;
+ let Inst{14-10} = 0b11111;
+ let Unpredictable{14-10} = 0b11111;
let Inst{9-5} = Rn;
let Inst{4-0} = Rd;
@@ -4920,6 +4924,8 @@ class LoadExclusivePair<bits<2> sz, bit o2, bit L, bit o1, bit o0,
bits<5> Rt;
bits<5> Rt2;
bits<5> Rn;
+ let Inst{20-16} = 0b11111;
+ let Unpredictable{20-16} = 0b11111;
let Inst{14-10} = Rt2;
let Inst{9-5} = Rn;
let Inst{4-0} = Rt;
@@ -4935,6 +4941,7 @@ class BaseLoadStoreExclusiveLSUI<bits<2> sz, bit L, bit o0,
let Inst{31-30} = sz;
let Inst{29-23} = 0b0010010;
let Inst{22} = L;
+ let Inst{21} = 0b0;
let Inst{15} = o0;
}
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index b37f4a0..c6f5cdc 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -381,9 +381,6 @@ def NoUseScalarIncVL : Predicate<"!Subtarget->useScalarIncVL()">;
def UseSVEFPLD1R : Predicate<"!Subtarget->noSVEFPLD1R()">;
-def UseUnaryUndefPseudos
- : Predicate<"!(Subtarget->isSVEorStreamingSVEAvailable() && (Subtarget->hasSVE2p2() || Subtarget->hasSME2p2()))">;
-
def AArch64LocalRecover : SDNode<"ISD::LOCAL_RECOVER",
SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>,
SDTCisInt<1>]>>;
@@ -886,6 +883,9 @@ def AArch64tlsdesc_callseq : SDNode<"AArch64ISD::TLSDESC_CALLSEQ",
SDT_AArch64TLSDescCallSeq,
[SDNPOutGlue, SDNPHasChain, SDNPVariadic]>;
+def AArch64tlsdesc_auth_callseq : SDNode<"AArch64ISD::TLSDESC_AUTH_CALLSEQ",
+ SDT_AArch64TLSDescCallSeq,
+ [SDNPOutGlue, SDNPHasChain, SDNPVariadic]>;
def AArch64WrapperLarge : SDNode<"AArch64ISD::WrapperLarge",
SDT_AArch64WrapperLarge>;
@@ -3315,8 +3315,16 @@ def TLSDESC_CALLSEQ
: Pseudo<(outs), (ins i64imm:$sym),
[(AArch64tlsdesc_callseq tglobaltlsaddr:$sym)]>,
Sched<[WriteI, WriteLD, WriteI, WriteBrReg]>;
+let isCall = 1, Defs = [NZCV, LR, X0, X16], hasSideEffects = 1, Size = 16,
+ isCodeGenOnly = 1 in
+def TLSDESC_AUTH_CALLSEQ
+ : Pseudo<(outs), (ins i64imm:$sym),
+ [(AArch64tlsdesc_auth_callseq tglobaltlsaddr:$sym)]>,
+ Sched<[WriteI, WriteLD, WriteI, WriteBrReg]>;
def : Pat<(AArch64tlsdesc_callseq texternalsym:$sym),
(TLSDESC_CALLSEQ texternalsym:$sym)>;
+def : Pat<(AArch64tlsdesc_auth_callseq texternalsym:$sym),
+ (TLSDESC_AUTH_CALLSEQ texternalsym:$sym)>;
//===----------------------------------------------------------------------===//
// Conditional branch (immediate) instruction.
@@ -5115,22 +5123,6 @@ let Predicates = [HasFullFP16] in {
//===----------------------------------------------------------------------===//
defm FCVT : FPConversion<"fcvt">;
-// Helper to get bf16 into fp32.
-def cvt_bf16_to_fp32 :
- OutPatFrag<(ops node:$Rn),
- (f32 (COPY_TO_REGCLASS
- (i32 (UBFMWri
- (i32 (COPY_TO_REGCLASS (INSERT_SUBREG (f32 (IMPLICIT_DEF)),
- node:$Rn, hsub), GPR32)),
- (i64 (i32shift_a (i64 16))),
- (i64 (i32shift_b (i64 16))))),
- FPR32))>;
-// Pattern for bf16 -> fp32.
-def : Pat<(f32 (any_fpextend (bf16 FPR16:$Rn))),
- (cvt_bf16_to_fp32 FPR16:$Rn)>;
-// Pattern for bf16 -> fp64.
-def : Pat<(f64 (any_fpextend (bf16 FPR16:$Rn))),
- (FCVTDSr (f32 (cvt_bf16_to_fp32 FPR16:$Rn)))>;
//===----------------------------------------------------------------------===//
// Floating point single operand instructions.
@@ -8325,8 +8317,6 @@ def : Pat<(v4i32 (anyext (v4i16 V64:$Rn))), (USHLLv4i16_shift V64:$Rn, (i32 0))>
def : Pat<(v2i64 (sext (v2i32 V64:$Rn))), (SSHLLv2i32_shift V64:$Rn, (i32 0))>;
def : Pat<(v2i64 (zext (v2i32 V64:$Rn))), (USHLLv2i32_shift V64:$Rn, (i32 0))>;
def : Pat<(v2i64 (anyext (v2i32 V64:$Rn))), (USHLLv2i32_shift V64:$Rn, (i32 0))>;
-// Vector bf16 -> fp32 is implemented morally as a zext + shift.
-def : Pat<(v4f32 (any_fpextend (v4bf16 V64:$Rn))), (SHLLv4i16 V64:$Rn)>;
// Also match an extend from the upper half of a 128 bit source register.
def : Pat<(v8i16 (anyext (v8i8 (extract_high_v16i8 (v16i8 V128:$Rn)) ))),
(USHLLv16i8_shift V128:$Rn, (i32 0))>;
diff --git a/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp b/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp
index a290a51..c3bc70a 100644
--- a/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp
+++ b/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp
@@ -144,20 +144,20 @@ void AArch64PointerAuth::signLR(MachineFunction &MF,
// No SEH opcode for this one; it doesn't materialize into an
// instruction on Windows.
if (MFnI.branchProtectionPAuthLR() && Subtarget->hasPAuthLR()) {
+ emitPACCFI(*Subtarget, MBB, MBBI, DL, MachineInstr::FrameSetup, EmitCFI);
BuildMI(MBB, MBBI, DL,
TII->get(MFnI.shouldSignWithBKey() ? AArch64::PACIBSPPC
: AArch64::PACIASPPC))
.setMIFlag(MachineInstr::FrameSetup)
->setPreInstrSymbol(MF, MFnI.getSigningInstrLabel());
- emitPACCFI(*Subtarget, MBB, MBBI, DL, MachineInstr::FrameSetup, EmitCFI);
} else {
BuildPACM(*Subtarget, MBB, MBBI, DL, MachineInstr::FrameSetup);
+ emitPACCFI(*Subtarget, MBB, MBBI, DL, MachineInstr::FrameSetup, EmitCFI);
BuildMI(MBB, MBBI, DL,
TII->get(MFnI.shouldSignWithBKey() ? AArch64::PACIBSP
: AArch64::PACIASP))
.setMIFlag(MachineInstr::FrameSetup)
->setPreInstrSymbol(MF, MFnI.getSigningInstrLabel());
- emitPACCFI(*Subtarget, MBB, MBBI, DL, MachineInstr::FrameSetup, EmitCFI);
}
if (!EmitCFI && NeedsWinCFI) {
@@ -212,19 +212,19 @@ void AArch64PointerAuth::authenticateLR(
if (MFnI->branchProtectionPAuthLR() && Subtarget->hasPAuthLR()) {
assert(PACSym && "No PAC instruction to refer to");
emitPACSymOffsetIntoX16(*TII, MBB, MBBI, DL, PACSym);
+ emitPACCFI(*Subtarget, MBB, MBBI, DL, MachineInstr::FrameDestroy,
+ EmitAsyncCFI);
BuildMI(MBB, MBBI, DL,
TII->get(UseBKey ? AArch64::AUTIBSPPCi : AArch64::AUTIASPPCi))
.addSym(PACSym)
.setMIFlag(MachineInstr::FrameDestroy);
- emitPACCFI(*Subtarget, MBB, MBBI, DL, MachineInstr::FrameDestroy,
- EmitAsyncCFI);
} else {
BuildPACM(*Subtarget, MBB, MBBI, DL, MachineInstr::FrameDestroy, PACSym);
+ emitPACCFI(*Subtarget, MBB, MBBI, DL, MachineInstr::FrameDestroy,
+ EmitAsyncCFI);
BuildMI(MBB, MBBI, DL,
TII->get(UseBKey ? AArch64::AUTIBSP : AArch64::AUTIASP))
.setMIFlag(MachineInstr::FrameDestroy);
- emitPACCFI(*Subtarget, MBB, MBBI, DL, MachineInstr::FrameDestroy,
- EmitAsyncCFI);
}
if (NeedsWinCFI) {
diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
index 8b8d73d..aee54ed 100644
--- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
@@ -979,8 +979,7 @@ defm FSCALE_2ZZ : sme2_fp_sve_destructive_vector_vg2_single<"fscale", 0b001100
defm FSCALE_4ZZ : sme2_fp_sve_destructive_vector_vg4_single<"fscale", 0b0011000>;
defm FSCALE_2Z2Z : sme2_fp_sve_destructive_vector_vg2_multi<"fscale", 0b0011000>;
defm FSCALE_4Z4Z : sme2_fp_sve_destructive_vector_vg4_multi<"fscale", 0b0011000>;
-
-} // [HasSME2, HasFP8]
+}
let Predicates = [HasSME2, HasFAMINMAX] in {
defm FAMAX_2Z2Z : sme2_fp_sve_destructive_vector_vg2_multi<"famax", 0b0010100>;
@@ -988,17 +987,16 @@ defm FAMIN_2Z2Z : sme2_fp_sve_destructive_vector_vg2_multi<"famin", 0b0010101>;
defm FAMAX_4Z4Z : sme2_fp_sve_destructive_vector_vg4_multi<"famax", 0b0010100>;
defm FAMIN_4Z4Z : sme2_fp_sve_destructive_vector_vg4_multi<"famin", 0b0010101>;
-} //[HasSME2, HasFAMINMAX]
-
+}
let Predicates = [HasSME_LUTv2] in {
defm MOVT_TIZ : sme2_movt_zt_to_zt<"movt", 0b0011111, int_aarch64_sme_write_lane_zt, int_aarch64_sme_write_zt>;
def LUTI4_4ZZT2Z : sme2_luti4_vector_vg4<0b00, 0b00,"luti4">;
-} //[HasSME_LUTv2]
+}
let Predicates = [HasSME2p1, HasSME_LUTv2] in {
def LUTI4_S_4ZZT2Z : sme2_luti4_vector_vg4_strided<0b00, 0b00, "luti4">;
-} //[HasSME2p1, HasSME_LUTv2]
+}
let Predicates = [HasSMEF8F16] in {
defm FVDOT_VG2_M2ZZI_BtoH : sme2_fp8_fdot_index_za16_vg1x2<"fvdot", 0b110, int_aarch64_sme_fp8_fvdot_lane_za16_vg1x2>;
@@ -1014,17 +1012,15 @@ defm FMLAL_MZZI_BtoH : sme2_fp8_fmlal_index_za16<"fmlal", int_aarch64_
defm FMLAL_VG2_M2ZZI_BtoH : sme2_fp8_fmlal_index_za16_vgx2<"fmlal", int_aarch64_sme_fp8_fmlal_lane_za16_vg2x2>;
defm FMLAL_VG4_M4ZZI_BtoH : sme2_fp8_fmlal_index_za16_vgx4<"fmlal", int_aarch64_sme_fp8_fmlal_lane_za16_vg2x4>;
-// FP8 FMLAL (single)
defm FMLAL_VG2_MZZ_BtoH : sme2_fp8_fmlal_single_za16<"fmlal", int_aarch64_sme_fp8_fmlal_single_za16_vg2x1>;
-defm FMLAL_VG2_M2ZZ_BtoH : sme2_fp_mla_long_array_vg2_single<"fmlal", 0b001, MatrixOp16, ZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_fp8_fmlal_single_za16_vg2x2, [FPMR, FPCR]>;
+defm FMLAL_VG2_M2ZZ_BtoH : sme2_fp_mla_long_array_vg2_single<"fmlal", 0b001, MatrixOp16, ZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_fp8_fmlal_single_za16_vg2x2, [FPMR, FPCR]>;
defm FMLAL_VG4_M4ZZ_BtoH : sme2_fp_mla_long_array_vg4_single<"fmlal", 0b001, MatrixOp16, ZZZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_fp8_fmlal_single_za16_vg2x4, [FPMR, FPCR]>;
-// FP8 FMLALL (multi)
defm FMLAL_VG2_M2Z2Z_BtoH : sme2_fp_mla_long_array_vg2_multi<"fmlal", 0b100, MatrixOp16, ZZ_b_mul_r, nxv16i8, int_aarch64_sme_fp8_fmlal_multi_za16_vg2x2, [FPMR, FPCR]>;
defm FMLAL_VG4_M4Z4Z_BtoH : sme2_fp_mla_long_array_vg4_multi<"fmlal", 0b100, MatrixOp16, ZZZZ_b_mul_r, nxv16i8, int_aarch64_sme_fp8_fmlal_multi_za16_vg2x4, [FPMR, FPCR]>;
defm FMOPA_MPPZZ_BtoH : sme2_fp8_fmopa_za16<"fmopa", int_aarch64_sme_fp8_fmopa_za16>;
-} //[HasSMEF8F16]
+}
let Predicates = [HasSMEF8F32] in {
defm FDOT_VG2_M2ZZI_BtoS : sme2_fp8_fdot_index_za32_vg1x2<"fdot", int_aarch64_sme_fp8_fdot_lane_za32_vg1x2>;
@@ -1042,17 +1038,15 @@ defm FMLALL_MZZI_BtoS : sme2_mla_ll_array_index_32b<"fmlall", 0b01, 0b0
defm FMLALL_VG2_M2ZZI_BtoS : sme2_mla_ll_array_vg2_index_32b<"fmlall", 0b10, 0b100, int_aarch64_sme_fp8_fmlall_lane_za32_vg4x2, [FPMR, FPCR]>;
defm FMLALL_VG4_M4ZZI_BtoS : sme2_mla_ll_array_vg4_index_32b<"fmlall", 0b00, 0b1000, int_aarch64_sme_fp8_fmlall_lane_za32_vg4x4, [FPMR, FPCR]>;
-// FP8 FMLALL (single)
defm FMLALL_MZZ_BtoS : sme2_mla_ll_array_single<"fmlall", 0b01000, MatrixOp32, ZPR8, ZPR4b8, nxv16i8, int_aarch64_sme_fp8_fmlall_single_za32_vg4x1, [FPMR, FPCR]>;
defm FMLALL_VG2_M2ZZ_BtoS : sme2_mla_ll_array_vg2_single<"fmlall", 0b000001, MatrixOp32, ZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_fp8_fmlall_single_za32_vg4x2, [FPMR, FPCR]>;
defm FMLALL_VG4_M4ZZ_BtoS : sme2_mla_ll_array_vg4_single<"fmlall", 0b010001, MatrixOp32, ZZZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_fp8_fmlall_single_za32_vg4x4, [FPMR, FPCR]>;
-// FP8 FMLALL (multi)
defm FMLALL_VG2_M2Z2Z_BtoS : sme2_mla_ll_array_vg2_multi<"fmlall", 0b01000, MatrixOp32, ZZ_b_mul_r, nxv16i8, int_aarch64_sme_fp8_fmlall_multi_za32_vg4x2, [FPMR, FPCR]>;
defm FMLALL_VG4_M4Z4Z_BtoS : sme2_mla_ll_array_vg4_multi<"fmlall", 0b01000, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, int_aarch64_sme_fp8_fmlall_multi_za32_vg4x4, [FPMR, FPCR]>;
defm FMOPA_MPPZZ_BtoS : sme2_fp8_fmopa_za32<"fmopa", int_aarch64_sme_fp8_fmopa_za32>;
-} //[HasSMEF8F32]
+}
let Predicates = [HasSME2, HasSVEBFSCALE] in {
defm BFSCALE : sme2_bfscale_single<"bfscale">;
@@ -1077,31 +1071,31 @@ let Predicates = [HasSME2p2] in {
defm FMOP4A : sme2_fmop4as_fp16_fp32_widening<0, "fmop4a">;
defm FMOP4S : sme2_fmop4as_fp16_fp32_widening<1, "fmop4s">;
-} // [HasSME2p2]
+}
let Predicates = [HasSME2p2, HasSMEB16B16] in {
def BFTMOPA_M2ZZZI_HtoH : sme_tmopa_16b<0b11001, ZZ_h_mul_r, ZPR16, "bftmopa">;
-} // [HasSME2p2, HasSMEB16B16]
+}
let Predicates = [HasSME2p2, HasSMEF8F32], Uses = [FPMR, FPCR] in {
def FTMOPA_M2ZZZI_BtoS : sme_tmopa_32b<0b01000, ZZ_b_mul_r, ZPR8, "ftmopa">;
-} // [HasSME2p2, HasSMEF8F32], Uses = [FPMR, FPCR]
+}
let Predicates = [HasSME2p2, HasSMEF8F16], Uses = [FPMR, FPCR] in {
def FTMOPA_M2ZZZI_BtoH : sme_tmopa_16b<0b01001, ZZ_b_mul_r, ZPR8, "ftmopa">;
defm FMOP4A : sme2_fmop4a_fp8_fp16_2way<"fmop4a">;
-} // [HasSME2p2, HasSMEF8F16], Uses = [FPMR, FPCR]
+}
let Predicates = [HasSME2p2, HasSMEF16F16] in {
def FTMOPA_M2ZZZI_HtoH : sme_tmopa_16b<0b10001, ZZ_h_mul_r, ZPR16, "ftmopa">;
defm FMOP4A : sme2_fmop4as_fp16_non_widening<0, "fmop4a">;
defm FMOP4S : sme2_fmop4as_fp16_non_widening<1, "fmop4s">;
-} // [HasSME2p2, HasSMEF16F16]
+}
let Predicates = [HasSME2, HasSVEBFSCALE] in {
defm BFMUL : sme2_bfmul_single<"bfmul">;
defm BFMUL : sme2_bfmul_multi<"bfmul">;
-} //[HasSME2, HasSVEBFSCALE]
+}
let Uses = [FPMR, FPCR] in {
let Predicates = [HasSME2p2, HasSMEF8F32] in {
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index c8892de..7dd6d49 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -675,14 +675,6 @@ let Predicates = [HasSVEorSME] in {
defm FABS_ZPmZ : sve_int_un_pred_arit_bitwise_fp<0b100, "fabs", AArch64fabs_mt>;
defm FNEG_ZPmZ : sve_int_un_pred_arit_bitwise_fp<0b101, "fneg", AArch64fneg_mt>;
- let Predicates = [HasSVEorSME, UseUnaryUndefPseudos] in {
- defm FABS_ZPmZ : sve_fp_un_pred_arit_hsd<AArch64fabs_mt>;
- defm FNEG_ZPmZ : sve_fp_un_pred_arit_hsd<AArch64fneg_mt>;
-
- defm ABS_ZPmZ : sve_int_un_pred_arit_bhsd<AArch64abs_mt>;
- defm NEG_ZPmZ : sve_int_un_pred_arit_bhsd<AArch64neg_mt>;
- }
-
foreach VT = [nxv2bf16, nxv4bf16, nxv8bf16] in {
// No dedicated instruction, so just clear the sign bit.
def : Pat<(VT (fabs VT:$op)),
diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td
index 737fc73..e23daec 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td
@@ -512,6 +512,12 @@ def N2Write_8c_3L_4V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL,
let NumMicroOps = 7;
}
+def N2Write_7c_7V0 : SchedWriteRes<[N2UnitV0]> {
+ let Latency = 7;
+ let NumMicroOps = 7;
+ let ReleaseAtCycles = [7];
+}
+
//===----------------------------------------------------------------------===//
// Define generic 8 micro-op types
@@ -548,6 +554,15 @@ def N2Write_9c_4L_4V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL, N2UnitL,
}
//===----------------------------------------------------------------------===//
+// Define generic 9 micro-op types
+
+def N2Write_9c_9V0 : SchedWriteRes<[N2UnitV0]> {
+ let Latency = 9;
+ let NumMicroOps = 9;
+ let ReleaseAtCycles = [9];
+}
+
+//===----------------------------------------------------------------------===//
// Define generic 10 micro-op types
def N2Write_7c_5L01_5V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
@@ -557,6 +572,12 @@ def N2Write_7c_5L01_5V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
let NumMicroOps = 10;
}
+def N2Write_10c_10V0 : SchedWriteRes<[N2UnitV0]> {
+ let Latency = 10;
+ let NumMicroOps = 10;
+ let ReleaseAtCycles = [10];
+}
+
//===----------------------------------------------------------------------===//
// Define generic 12 micro-op types
@@ -580,6 +601,21 @@ def N2Write_7c_5L01_5S_5V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
let NumMicroOps = 15;
}
+def N2Write_15c_15V0 : SchedWriteRes<[N2UnitV0]> {
+ let Latency = 15;
+ let NumMicroOps = 15;
+ let ReleaseAtCycles = [15];
+}
+
+//===----------------------------------------------------------------------===//
+// Define generic 16 micro-op types
+
+def N2Write_16c_16V0 : SchedWriteRes<[N2UnitV0]> {
+ let Latency = 16;
+ let NumMicroOps = 16;
+ let ReleaseAtCycles = [16];
+}
+
//===----------------------------------------------------------------------===//
// Define generic 18 micro-op types
@@ -795,22 +831,26 @@ def : SchedAlias<WriteF, N2Write_2c_1V>;
// FP compare
def : SchedAlias<WriteFCmp, N2Write_2c_1V0>;
+// FP divide and square root operations are performed using an iterative
+// algorithm and block subsequent similar operations to the same pipeline
+// until complete (Arm Neoverse N2 Software Optimization Guide, 3.14).
+
// FP divide, square root
-def : SchedAlias<WriteFDiv, N2Write_7c_1V0>;
+def : SchedAlias<WriteFDiv, N2Write_7c_7V0>;
// FP divide, H-form
-def : InstRW<[N2Write_7c_1V0], (instrs FDIVHrr)>;
+def : InstRW<[N2Write_7c_7V0], (instrs FDIVHrr)>;
// FP divide, S-form
-def : InstRW<[N2Write_10c_1V0], (instrs FDIVSrr)>;
+def : InstRW<[N2Write_10c_10V0], (instrs FDIVSrr)>;
// FP divide, D-form
-def : InstRW<[N2Write_15c_1V0], (instrs FDIVDrr)>;
+def : InstRW<[N2Write_15c_15V0], (instrs FDIVDrr)>;
// FP square root, H-form
-def : InstRW<[N2Write_7c_1V0], (instrs FSQRTHr)>;
+def : InstRW<[N2Write_7c_7V0], (instrs FSQRTHr)>;
// FP square root, S-form
-def : InstRW<[N2Write_9c_1V0], (instrs FSQRTSr)>;
+def : InstRW<[N2Write_9c_9V0], (instrs FSQRTSr)>;
// FP square root, D-form
-def : InstRW<[N2Write_16c_1V0], (instrs FSQRTDr)>;
+def : InstRW<[N2Write_16c_16V0], (instrs FSQRTDr)>;
// FP multiply
def : WriteRes<WriteFMul, [N2UnitV]> { let Latency = 3; }
diff --git a/llvm/lib/Target/AArch64/AArch64SystemOperands.td b/llvm/lib/Target/AArch64/AArch64SystemOperands.td
index f22e024..355a9d2 100644
--- a/llvm/lib/Target/AArch64/AArch64SystemOperands.td
+++ b/llvm/lib/Target/AArch64/AArch64SystemOperands.td
@@ -42,10 +42,7 @@ def HasCONTEXTIDREL2
//===----------------------------------------------------------------------===//
class AT<string name, bits<3> op1, bits<4> crn, bits<4> crm,
- bits<3> op2> : SearchableTable {
- let SearchableFields = ["Name", "Encoding"];
- let EnumValueField = "Encoding";
-
+ bits<3> op2> {
string Name = name;
bits<14> Encoding;
let Encoding{13-11} = op1;
@@ -55,6 +52,27 @@ class AT<string name, bits<3> op1, bits<4> crn, bits<4> crm,
code Requires = [{ {} }];
}
+def ATValues : GenericEnum {
+ let FilterClass = "AT";
+ let NameField = "Name";
+ let ValueField = "Encoding";
+}
+
+def ATsList : GenericTable {
+ let FilterClass = "AT";
+ let Fields = ["Name", "Encoding", "Requires"];
+}
+
+def lookupATByName : SearchIndex {
+ let Table = ATsList;
+ let Key = ["Name"];
+}
+
+def lookupATByEncoding : SearchIndex {
+ let Table = ATsList;
+ let Key = ["Encoding"];
+}
+
def : AT<"S1E1R", 0b000, 0b0111, 0b1000, 0b000>;
def : AT<"S1E2R", 0b100, 0b0111, 0b1000, 0b000>;
def : AT<"S1E3R", 0b110, 0b0111, 0b1000, 0b000>;
@@ -82,14 +100,32 @@ def : AT<"S1E3A", 0b110, 0b0111, 0b1001, 0b010>;
// DMB/DSB (data barrier) instruction options.
//===----------------------------------------------------------------------===//
-class DB<string name, bits<4> encoding> : SearchableTable {
- let SearchableFields = ["Name", "Encoding"];
- let EnumValueField = "Encoding";
-
+class DB<string name, bits<4> encoding> {
string Name = name;
bits<4> Encoding = encoding;
}
+def DBValues : GenericEnum {
+ let FilterClass = "DB";
+ let NameField = "Name";
+ let ValueField = "Encoding";
+}
+
+def DBsList : GenericTable {
+ let FilterClass = "DB";
+ let Fields = ["Name", "Encoding"];
+}
+
+def lookupDBByName : SearchIndex {
+ let Table = DBsList;
+ let Key = ["Name"];
+}
+
+def lookupDBByEncoding : SearchIndex {
+ let Table = DBsList;
+ let Key = ["Encoding"];
+}
+
def : DB<"oshld", 0x1>;
def : DB<"oshst", 0x2>;
def : DB<"osh", 0x3>;
@@ -103,16 +139,39 @@ def : DB<"ld", 0xd>;
def : DB<"st", 0xe>;
def : DB<"sy", 0xf>;
-class DBnXS<string name, bits<4> encoding, bits<5> immValue> : SearchableTable {
- let SearchableFields = ["Name", "Encoding", "ImmValue"];
- let EnumValueField = "Encoding";
-
+class DBnXS<string name, bits<4> encoding, bits<5> immValue> {
string Name = name;
bits<4> Encoding = encoding;
bits<5> ImmValue = immValue;
code Requires = [{ {AArch64::FeatureXS} }];
}
+def DBnXSValues : GenericEnum {
+ let FilterClass = "DBnXS";
+ let NameField = "Name";
+ let ValueField = "Encoding";
+}
+
+def DBnXSsList : GenericTable {
+ let FilterClass = "DBnXS";
+ let Fields = ["Name", "Encoding", "ImmValue", "Requires"];
+}
+
+def lookupDBnXSByName : SearchIndex {
+ let Table = DBnXSsList;
+ let Key = ["Name"];
+}
+
+def lookupDBnXSByEncoding : SearchIndex {
+ let Table = DBnXSsList;
+ let Key = ["Encoding"];
+}
+
+def lookupDBnXSByImmValue : SearchIndex {
+ let Table = DBnXSsList;
+ let Key = ["ImmValue"];
+}
+
def : DBnXS<"oshnxs", 0x3, 0x10>;
def : DBnXS<"nshnxs", 0x7, 0x14>;
def : DBnXS<"ishnxs", 0xb, 0x18>;
@@ -123,10 +182,7 @@ def : DBnXS<"synxs", 0xf, 0x1c>;
//===----------------------------------------------------------------------===//
class DC<string name, bits<3> op1, bits<4> crn, bits<4> crm,
- bits<3> op2> : SearchableTable {
- let SearchableFields = ["Name", "Encoding"];
- let EnumValueField = "Encoding";
-
+ bits<3> op2> {
string Name = name;
bits<14> Encoding;
let Encoding{13-11} = op1;
@@ -136,6 +192,27 @@ class DC<string name, bits<3> op1, bits<4> crn, bits<4> crm,
code Requires = [{ {} }];
}
+def DCValues : GenericEnum {
+ let FilterClass = "DC";
+ let NameField = "Name";
+ let ValueField = "Encoding";
+}
+
+def DCsList : GenericTable {
+ let FilterClass = "DC";
+ let Fields = ["Name", "Encoding", "Requires"];
+}
+
+def lookupDCByName : SearchIndex {
+ let Table = DCsList;
+ let Key = ["Name"];
+}
+
+def lookupDCByEncoding : SearchIndex {
+ let Table = DCsList;
+ let Key = ["Encoding"];
+}
+
def : DC<"ZVA", 0b011, 0b0111, 0b0100, 0b001>;
def : DC<"IVAC", 0b000, 0b0111, 0b0110, 0b001>;
def : DC<"ISW", 0b000, 0b0111, 0b0110, 0b010>;
@@ -193,10 +270,7 @@ def : DC<"CGDVAOC", 0b011, 0b0111, 0b1011, 0b111>;
//===----------------------------------------------------------------------===//
class IC<string name, bits<3> op1, bits<4> crn, bits<4> crm, bits<3> op2,
- bit needsreg> : SearchableTable {
- let SearchableFields = ["Name", "Encoding"];
- let EnumValueField = "Encoding";
-
+ bit needsreg> {
string Name = name;
bits<14> Encoding;
let Encoding{13-11} = op1;
@@ -206,6 +280,27 @@ class IC<string name, bits<3> op1, bits<4> crn, bits<4> crm, bits<3> op2,
bit NeedsReg = needsreg;
}
+def ICValues : GenericEnum {
+ let FilterClass = "IC";
+ let NameField = "Name";
+ let ValueField = "Encoding";
+}
+
+def ICsList : GenericTable {
+ let FilterClass = "IC";
+ let Fields = ["Name", "Encoding", "NeedsReg"];
+}
+
+def lookupICByName : SearchIndex {
+ let Table = ICsList;
+ let Key = ["Name"];
+}
+
+def lookupICByEncoding : SearchIndex {
+ let Table = ICsList;
+ let Key = ["Encoding"];
+}
+
def : IC<"IALLUIS", 0b000, 0b0111, 0b0001, 0b000, 0>;
def : IC<"IALLU", 0b000, 0b0111, 0b0101, 0b000, 0>;
def : IC<"IVAU", 0b011, 0b0111, 0b0101, 0b001, 1>;
@@ -214,25 +309,40 @@ def : IC<"IVAU", 0b011, 0b0111, 0b0101, 0b001, 1>;
// ISB (instruction-fetch barrier) instruction options.
//===----------------------------------------------------------------------===//
-class ISB<string name, bits<4> encoding> : SearchableTable{
- let SearchableFields = ["Name", "Encoding"];
- let EnumValueField = "Encoding";
-
+class ISB<string name, bits<4> encoding> {
string Name = name;
bits<4> Encoding;
let Encoding = encoding;
}
+def ISBValues : GenericEnum {
+ let FilterClass = "ISB";
+ let NameField = "Name";
+ let ValueField = "Encoding";
+}
+
+def ISBsList : GenericTable {
+ let FilterClass = "ISB";
+ let Fields = ["Name", "Encoding"];
+}
+
+def lookupISBByName : SearchIndex {
+ let Table = ISBsList;
+ let Key = ["Name"];
+}
+
+def lookupISBByEncoding : SearchIndex {
+ let Table = ISBsList;
+ let Key = ["Encoding"];
+}
+
def : ISB<"sy", 0xf>;
//===----------------------------------------------------------------------===//
// TSB (Trace synchronization barrier) instruction options.
//===----------------------------------------------------------------------===//
-class TSB<string name, bits<4> encoding> : SearchableTable{
- let SearchableFields = ["Name", "Encoding"];
- let EnumValueField = "Encoding";
-
+class TSB<string name, bits<4> encoding> {
string Name = name;
bits<4> Encoding;
let Encoding = encoding;
@@ -240,6 +350,27 @@ class TSB<string name, bits<4> encoding> : SearchableTable{
code Requires = [{ {AArch64::FeatureTRACEV8_4} }];
}
+def TSBValues : GenericEnum {
+ let FilterClass = "TSB";
+ let NameField = "Name";
+ let ValueField = "Encoding";
+}
+
+def TSBsList : GenericTable {
+ let FilterClass = "TSB";
+ let Fields = ["Name", "Encoding", "Requires"];
+}
+
+def lookupTSBByName : SearchIndex {
+ let Table = TSBsList;
+ let Key = ["Name"];
+}
+
+def lookupTSBByEncoding : SearchIndex {
+ let Table = TSBsList;
+ let Key = ["Encoding"];
+}
+
def : TSB<"csync", 0>;
//===----------------------------------------------------------------------===//
@@ -248,10 +379,7 @@ def : TSB<"csync", 0>;
class PRFM<string type, bits<2> type_encoding,
string target, bits<2> target_encoding,
- string policy, bits<1> policy_encoding> : SearchableTable {
- let SearchableFields = ["Name", "Encoding"];
- let EnumValueField = "Encoding";
-
+ string policy, bits<1> policy_encoding> {
string Name = type # target # policy;
bits<5> Encoding;
let Encoding{4-3} = type_encoding;
@@ -261,6 +389,27 @@ class PRFM<string type, bits<2> type_encoding,
code Requires = [{ {} }];
}
+def PRFMValues : GenericEnum {
+ let FilterClass = "PRFM";
+ let NameField = "Name";
+ let ValueField = "Encoding";
+}
+
+def PRFMsList : GenericTable {
+ let FilterClass = "PRFM";
+ let Fields = ["Name", "Encoding", "Requires"];
+}
+
+def lookupPRFMByName : SearchIndex {
+ let Table = PRFMsList;
+ let Key = ["Name"];
+}
+
+def lookupPRFMByEncoding : SearchIndex {
+ let Table = PRFMsList;
+ let Key = ["Encoding"];
+}
+
def : PRFM<"pld", 0b00, "l1", 0b00, "keep", 0b0>;
def : PRFM<"pld", 0b00, "l1", 0b00, "strm", 0b1>;
def : PRFM<"pld", 0b00, "l2", 0b01, "keep", 0b0>;
@@ -296,16 +445,34 @@ def : PRFM<"pst", 0b10, "slc", 0b11, "strm", 0b1>;
// SVE Prefetch instruction options.
//===----------------------------------------------------------------------===//
-class SVEPRFM<string name, bits<4> encoding> : SearchableTable {
- let SearchableFields = ["Name", "Encoding"];
- let EnumValueField = "Encoding";
-
+class SVEPRFM<string name, bits<4> encoding> {
string Name = name;
bits<4> Encoding;
let Encoding = encoding;
code Requires = [{ {} }];
}
+def SVEPRFMValues : GenericEnum {
+ let FilterClass = "SVEPRFM";
+ let NameField = "Name";
+ let ValueField = "Encoding";
+}
+
+def SVEPRFMsList : GenericTable {
+ let FilterClass = "SVEPRFM";
+ let Fields = ["Name", "Encoding", "Requires"];
+}
+
+def lookupSVEPRFMByName : SearchIndex {
+ let Table = SVEPRFMsList;
+ let Key = ["Name"];
+}
+
+def lookupSVEPRFMByEncoding : SearchIndex {
+ let Table = SVEPRFMsList;
+ let Key = ["Encoding"];
+}
+
let Requires = [{ {AArch64::FeatureSVE} }] in {
def : SVEPRFM<"pldl1keep", 0x00>;
def : SVEPRFM<"pldl1strm", 0x01>;
@@ -325,10 +492,7 @@ def : SVEPRFM<"pstl3strm", 0x0d>;
// RPRFM (prefetch) instruction options.
//===----------------------------------------------------------------------===//
-class RPRFM<string name, bits<1> type_encoding, bits<5> policy_encoding> : SearchableTable {
- let SearchableFields = ["Name", "Encoding"];
- let EnumValueField = "Encoding";
-
+class RPRFM<string name, bits<1> type_encoding, bits<5> policy_encoding> {
string Name = name;
bits<6> Encoding;
let Encoding{0} = type_encoding;
@@ -336,6 +500,27 @@ class RPRFM<string name, bits<1> type_encoding, bits<5> policy_encoding> : Searc
code Requires = [{ {} }];
}
+def RPRFMValues : GenericEnum {
+ let FilterClass = "RPRFM";
+ let NameField = "Name";
+ let ValueField = "Encoding";
+}
+
+def RPRFMsList : GenericTable {
+ let FilterClass = "RPRFM";
+ let Fields = ["Name", "Encoding", "Requires"];
+}
+
+def lookupRPRFMByName : SearchIndex {
+ let Table = RPRFMsList;
+ let Key = ["Name"];
+}
+
+def lookupRPRFMByEncoding : SearchIndex {
+ let Table = RPRFMsList;
+ let Key = ["Encoding"];
+}
+
def : RPRFM<"pldkeep", 0b0, 0b00000>;
def : RPRFM<"pstkeep", 0b1, 0b00000>;
def : RPRFM<"pldstrm", 0b0, 0b00010>;
@@ -345,15 +530,33 @@ def : RPRFM<"pststrm", 0b1, 0b00010>;
// SVE Predicate patterns
//===----------------------------------------------------------------------===//
-class SVEPREDPAT<string name, bits<5> encoding> : SearchableTable {
- let SearchableFields = ["Name", "Encoding"];
- let EnumValueField = "Encoding";
-
+class SVEPREDPAT<string name, bits<5> encoding> {
string Name = name;
bits<5> Encoding;
let Encoding = encoding;
}
+def SVEPREDPATValues : GenericEnum {
+ let FilterClass = "SVEPREDPAT";
+ let NameField = "Name";
+ let ValueField = "Encoding";
+}
+
+def SVEPREDPATsList : GenericTable {
+ let FilterClass = "SVEPREDPAT";
+ let Fields = ["Name", "Encoding"];
+}
+
+def lookupSVEPREDPATByName : SearchIndex {
+ let Table = SVEPREDPATsList;
+ let Key = ["Name"];
+}
+
+def lookupSVEPREDPATByEncoding : SearchIndex {
+ let Table = SVEPREDPATsList;
+ let Key = ["Encoding"];
+}
+
def : SVEPREDPAT<"pow2", 0x00>;
def : SVEPREDPAT<"vl1", 0x01>;
def : SVEPREDPAT<"vl2", 0x02>;
@@ -376,15 +579,33 @@ def : SVEPREDPAT<"all", 0x1f>;
// SVE Predicate-as-counter patterns
//===----------------------------------------------------------------------===//
-class SVEVECLENSPECIFIER<string name, bits<1> encoding> : SearchableTable {
- let SearchableFields = ["Name", "Encoding"];
- let EnumValueField = "Encoding";
-
+class SVEVECLENSPECIFIER<string name, bits<1> encoding> {
string Name = name;
bits<1> Encoding;
let Encoding = encoding;
}
+def SVEVECLENSPECIFIERValues : GenericEnum {
+ let FilterClass = "SVEVECLENSPECIFIER";
+ let NameField = "Name";
+ let ValueField = "Encoding";
+}
+
+def SVEVECLENSPECIFIERsList : GenericTable {
+ let FilterClass = "SVEVECLENSPECIFIER";
+ let Fields = ["Name", "Encoding"];
+}
+
+def lookupSVEVECLENSPECIFIERByName : SearchIndex {
+ let Table = SVEVECLENSPECIFIERsList;
+ let Key = ["Name"];
+}
+
+def lookupSVEVECLENSPECIFIERByEncoding : SearchIndex {
+ let Table = SVEVECLENSPECIFIERsList;
+ let Key = ["Encoding"];
+}
+
def : SVEVECLENSPECIFIER<"vlx2", 0x0>;
def : SVEVECLENSPECIFIER<"vlx4", 0x1>;
@@ -395,15 +616,28 @@ def : SVEVECLENSPECIFIER<"vlx4", 0x1>;
// is used for a few instructions that only accept a limited set of exact FP
// immediates values.
//===----------------------------------------------------------------------===//
-class ExactFPImm<string name, string repr, bits<4> enum > : SearchableTable {
- let SearchableFields = ["Enum", "Repr"];
- let EnumValueField = "Enum";
-
+class ExactFPImm<string name, string repr, bits<4> enum > {
string Name = name;
bits<4> Enum = enum;
string Repr = repr;
}
+def ExactFPImmValues : GenericEnum {
+ let FilterClass = "ExactFPImm";
+ let NameField = "Name";
+ let ValueField = "Enum";
+}
+
+def ExactFPImmsList : GenericTable {
+ let FilterClass = "ExactFPImm";
+ let Fields = ["Enum", "Repr"];
+}
+
+def lookupExactFPImmByEnum : SearchIndex {
+ let Table = ExactFPImmsList;
+ let Key = ["Enum"];
+}
+
def : ExactFPImm<"zero", "0.0", 0x0>;
def : ExactFPImm<"half", "0.5", 0x1>;
def : ExactFPImm<"one", "1.0", 0x2>;
@@ -413,10 +647,7 @@ def : ExactFPImm<"two", "2.0", 0x3>;
// PState instruction options.
//===----------------------------------------------------------------------===//
-class PStateImm0_15<string name, bits<3> op1, bits<3> op2> : SearchableTable {
- let SearchableFields = ["Name", "Encoding"];
- let EnumValueField = "Encoding";
-
+class PStateImm0_15<string name, bits<3> op1, bits<3> op2> {
string Name = name;
bits<6> Encoding;
let Encoding{5-3} = op1;
@@ -424,10 +655,28 @@ class PStateImm0_15<string name, bits<3> op1, bits<3> op2> : SearchableTable {
code Requires = [{ {} }];
}
-class PStateImm0_1<string name, bits<3> op1, bits<3> op2, bits<3> crm_high> : SearchableTable {
- let SearchableFields = ["Name", "Encoding"];
- let EnumValueField = "Encoding";
+def PStateImm0_15Values : GenericEnum {
+ let FilterClass = "PStateImm0_15";
+ let NameField = "Name";
+ let ValueField = "Encoding";
+}
+
+def PStateImm0_15sList : GenericTable {
+ let FilterClass = "PStateImm0_15";
+ let Fields = ["Name", "Encoding", "Requires"];
+}
+def lookupPStateImm0_15ByName : SearchIndex {
+ let Table = PStateImm0_15sList;
+ let Key = ["Name"];
+}
+
+def lookupPStateImm0_15ByEncoding : SearchIndex {
+ let Table = PStateImm0_15sList;
+ let Key = ["Encoding"];
+}
+
+class PStateImm0_1<string name, bits<3> op1, bits<3> op2, bits<3> crm_high> {
string Name = name;
bits<9> Encoding;
let Encoding{8-6} = crm_high;
@@ -436,6 +685,27 @@ class PStateImm0_1<string name, bits<3> op1, bits<3> op2, bits<3> crm_high> : Se
code Requires = [{ {} }];
}
+def PStateImm0_1Values : GenericEnum {
+ let FilterClass = "PStateImm0_1";
+ let NameField = "Name";
+ let ValueField = "Encoding";
+}
+
+def PStateImm0_1sList : GenericTable {
+ let FilterClass = "PStateImm0_1";
+ let Fields = ["Name", "Encoding", "Requires"];
+}
+
+def lookupPStateImm0_1ByName : SearchIndex {
+ let Table = PStateImm0_1sList;
+ let Key = ["Name"];
+}
+
+def lookupPStateImm0_1ByEncoding : SearchIndex {
+ let Table = PStateImm0_1sList;
+ let Key = ["Encoding"];
+}
+
// Name, Op1, Op2
def : PStateImm0_15<"SPSel", 0b000, 0b101>;
def : PStateImm0_15<"DAIFSet", 0b011, 0b110>;
@@ -467,16 +737,34 @@ def : PStateImm0_1<"PM", 0b001, 0b000, 0b001>;
// SVCR instruction options.
//===----------------------------------------------------------------------===//
-class SVCR<string name, bits<3> encoding> : SearchableTable {
- let SearchableFields = ["Name", "Encoding"];
- let EnumValueField = "Encoding";
-
+class SVCR<string name, bits<3> encoding> {
string Name = name;
bits<3> Encoding;
let Encoding = encoding;
code Requires = [{ {} }];
}
+def SVCRValues : GenericEnum {
+ let FilterClass = "SVCR";
+ let NameField = "Name";
+ let ValueField = "Encoding";
+}
+
+def SVCRsList : GenericTable {
+ let FilterClass = "SVCR";
+ let Fields = ["Name", "Encoding", "Requires"];
+}
+
+def lookupSVCRByName : SearchIndex {
+ let Table = SVCRsList;
+ let Key = ["Name"];
+}
+
+def lookupSVCRByEncoding : SearchIndex {
+ let Table = SVCRsList;
+ let Key = ["Encoding"];
+}
+
let Requires = [{ {AArch64::FeatureSME} }] in {
def : SVCR<"SVCRSM", 0b001>;
def : SVCR<"SVCRZA", 0b010>;
@@ -487,30 +775,66 @@ def : SVCR<"SVCRSMZA", 0b011>;
// PSB instruction options.
//===----------------------------------------------------------------------===//
-class PSB<string name, bits<5> encoding> : SearchableTable {
- let SearchableFields = ["Name", "Encoding"];
- let EnumValueField = "Encoding";
-
+class PSB<string name, bits<5> encoding> {
string Name = name;
bits<5> Encoding;
let Encoding = encoding;
}
+def PSBValues : GenericEnum {
+ let FilterClass = "PSB";
+ let NameField = "Name";
+ let ValueField = "Encoding";
+}
+
+def PSBsList : GenericTable {
+ let FilterClass = "PSB";
+ let Fields = ["Name", "Encoding"];
+}
+
+def lookupPSBByName : SearchIndex {
+ let Table = PSBsList;
+ let Key = ["Name"];
+}
+
+def lookupPSBByEncoding : SearchIndex {
+ let Table = PSBsList;
+ let Key = ["Encoding"];
+}
+
def : PSB<"csync", 0x11>;
//===----------------------------------------------------------------------===//
// BTI instruction options.
//===----------------------------------------------------------------------===//
-class BTI<string name, bits<3> encoding> : SearchableTable {
- let SearchableFields = ["Name", "Encoding"];
- let EnumValueField = "Encoding";
-
+class BTI<string name, bits<3> encoding> {
string Name = name;
bits<3> Encoding;
let Encoding = encoding;
}
+def BTIValues : GenericEnum {
+ let FilterClass = "BTI";
+ let NameField = "Name";
+ let ValueField = "Encoding";
+}
+
+def BTIsList : GenericTable {
+ let FilterClass = "BTI";
+ let Fields = ["Name", "Encoding"];
+}
+
+def lookupBTIByName : SearchIndex {
+ let Table = BTIsList;
+ let Key = ["Name"];
+}
+
+def lookupBTIByEncoding : SearchIndex {
+ let Table = BTIsList;
+ let Key = ["Encoding"];
+}
+
def : BTI<"c", 0b010>;
def : BTI<"j", 0b100>;
def : BTI<"jc", 0b110>;
@@ -667,12 +991,8 @@ defm : TLBI<"VMALLWS2E1OS", 0b100, 0b1000, 0b0101, 0b010, 0>;
//===----------------------------------------------------------------------===//
class SysReg<string name, bits<2> op0, bits<3> op1, bits<4> crn, bits<4> crm,
- bits<3> op2> : SearchableTable {
- let SearchableFields = ["Name", "Encoding"];
- let EnumValueField = "Encoding";
-
+ bits<3> op2> {
string Name = name;
- string AltName = name;
bits<16> Encoding;
let Encoding{15-14} = op0;
let Encoding{13-11} = op1;
@@ -684,6 +1004,26 @@ class SysReg<string name, bits<2> op0, bits<3> op1, bits<4> crn, bits<4> crm,
code Requires = [{ {} }];
}
+def SysRegValues : GenericEnum {
+ let FilterClass = "SysReg";
+ let NameField = "Name";
+ let ValueField = "Encoding";
+}
+
+def SysRegsList : GenericTable {
+ let FilterClass = "SysReg";
+ let Fields = ["Name", "Encoding", "Readable", "Writeable", "Requires"];
+
+ let PrimaryKey = ["Encoding"];
+ let PrimaryKeyName = "lookupSysRegByEncoding";
+ let PrimaryKeyReturnRange = true;
+}
+
+def lookupSysRegByName : SearchIndex {
+ let Table = SysRegsList;
+ let Key = ["Name"];
+}
+
class RWSysReg<string name, bits<2> op0, bits<3> op1, bits<4> crn, bits<4> crm,
bits<3> op2>
: SysReg<name, op0, op1, crn, crm, op2> {
@@ -969,9 +1309,7 @@ def : RWSysReg<"TTBR0_EL1", 0b11, 0b000, 0b0010, 0b0000, 0b000>;
def : RWSysReg<"TTBR0_EL3", 0b11, 0b110, 0b0010, 0b0000, 0b000>;
let Requires = [{ {AArch64::FeatureEL2VMSA} }] in {
-def : RWSysReg<"TTBR0_EL2", 0b11, 0b100, 0b0010, 0b0000, 0b000> {
- let AltName = "VSCTLR_EL2";
-}
+def : RWSysReg<"TTBR0_EL2", 0b11, 0b100, 0b0010, 0b0000, 0b000>;
def : RWSysReg<"VTTBR_EL2", 0b11, 0b100, 0b0010, 0b0001, 0b000>;
}
@@ -1358,9 +1696,7 @@ def : RWSysReg<"ICH_LR15_EL2", 0b11, 0b100, 0b1100, 0b1101, 0b111>;
let Requires = [{ {AArch64::HasV8_0rOps} }] in {
//Virtualization System Control Register
// Op0 Op1 CRn CRm Op2
-def : RWSysReg<"VSCTLR_EL2", 0b11, 0b100, 0b0010, 0b0000, 0b000> {
- let AltName = "TTBR0_EL2";
-}
+def : RWSysReg<"VSCTLR_EL2", 0b11, 0b100, 0b0010, 0b0000, 0b000>;
//MPU Type Register
// Op0 Op1 CRn CRm Op2
@@ -2026,12 +2362,8 @@ def : RWSysReg<"ACTLRALIAS_EL1", 0b11, 0b000, 0b0001, 0b0100, 0b101>;
//===----------------------------------------------------------------------===//
class PHint<bits<2> op0, bits<3> op1, bits<4> crn, bits<4> crm,
- bits<3> op2, string name> : SearchableTable {
- let SearchableFields = ["Name", "Encoding"];
- let EnumValueField = "Encoding";
-
+ bits<3> op2, string name> {
string Name = name;
- string AltName = name;
bits<16> Encoding;
let Encoding{15-14} = op0;
let Encoding{13-11} = op1;
@@ -2041,6 +2373,27 @@ class PHint<bits<2> op0, bits<3> op1, bits<4> crn, bits<4> crm,
code Requires = [{ {} }];
}
+def PHintValues : GenericEnum {
+ let FilterClass = "PHint";
+ let NameField = "Name";
+ let ValueField = "Encoding";
+}
+
+def PHintsList : GenericTable {
+ let FilterClass = "PHint";
+ let Fields = ["Name", "Encoding", "Requires"];
+}
+
+def lookupPHintByName : SearchIndex {
+ let Table = PHintsList;
+ let Key = ["Name"];
+}
+
+def lookupPHintByEncoding : SearchIndex {
+ let Table = PHintsList;
+ let Key = ["Encoding"];
+}
+
let Requires = [{ {AArch64::FeaturePCDPHINT} }] in {
def KEEP : PHint<0b00, 0b000, 0b0000, 0b0000, 0b000, "keep">;
def STRM : PHint<0b00, 0b000, 0b0000, 0b0000, 0b001, "strm">;
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 0566a87..25b6731 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -35,6 +35,9 @@ using namespace llvm::PatternMatch;
static cl::opt<bool> EnableFalkorHWPFUnrollFix("enable-falkor-hwpf-unroll-fix",
cl::init(true), cl::Hidden);
+static cl::opt<bool> SVEPreferFixedOverScalableIfEqualCost(
+ "sve-prefer-fixed-over-scalable-if-equal", cl::Hidden);
+
static cl::opt<unsigned> SVEGatherOverhead("sve-gather-overhead", cl::init(10),
cl::Hidden);
@@ -256,7 +259,7 @@ bool AArch64TTIImpl::areInlineCompatible(const Function *Caller,
CalleeAttrs.set(SMEAttrs::SM_Enabled, true);
}
- if (CalleeAttrs.isNewZA())
+ if (CalleeAttrs.isNewZA() || CalleeAttrs.isNewZT0())
return false;
if (CallerAttrs.requiresLazySave(CalleeAttrs) ||
@@ -1635,10 +1638,8 @@ instCombineSVEVectorBinOp(InstCombiner &IC, IntrinsicInst &II) {
!match(OpPredicate, m_Intrinsic<Intrinsic::aarch64_sve_ptrue>(
m_ConstantInt<AArch64SVEPredPattern::all>())))
return std::nullopt;
- IRBuilderBase::FastMathFlagGuard FMFGuard(IC.Builder);
- IC.Builder.setFastMathFlags(II.getFastMathFlags());
- auto BinOp =
- IC.Builder.CreateBinOp(BinOpCode, II.getOperand(1), II.getOperand(2));
+ auto BinOp = IC.Builder.CreateBinOpFMF(
+ BinOpCode, II.getOperand(1), II.getOperand(2), II.getFastMathFlags());
return IC.replaceInstUsesWith(II, BinOp);
}
@@ -2760,6 +2761,21 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
return AdjustCost(
BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I));
+ static const TypeConversionCostTblEntry BF16Tbl[] = {
+ {ISD::FP_ROUND, MVT::bf16, MVT::f32, 1}, // bfcvt
+ {ISD::FP_ROUND, MVT::bf16, MVT::f64, 1}, // bfcvt
+ {ISD::FP_ROUND, MVT::v4bf16, MVT::v4f32, 1}, // bfcvtn
+ {ISD::FP_ROUND, MVT::v8bf16, MVT::v8f32, 2}, // bfcvtn+bfcvtn2
+ {ISD::FP_ROUND, MVT::v2bf16, MVT::v2f64, 2}, // bfcvtn+fcvtn
+ {ISD::FP_ROUND, MVT::v4bf16, MVT::v4f64, 3}, // fcvtn+fcvtl2+bfcvtn
+ {ISD::FP_ROUND, MVT::v8bf16, MVT::v8f64, 6}, // 2 * fcvtn+fcvtn2+bfcvtn
+ };
+
+ if (ST->hasBF16())
+ if (const auto *Entry = ConvertCostTableLookup(
+ BF16Tbl, ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT()))
+ return AdjustCost(Entry->Cost);
+
static const TypeConversionCostTblEntry ConversionTbl[] = {
{ISD::TRUNCATE, MVT::v2i8, MVT::v2i64, 1}, // xtn
{ISD::TRUNCATE, MVT::v2i16, MVT::v2i64, 1}, // xtn
@@ -2847,6 +2863,14 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
{ISD::FP_EXTEND, MVT::v2f64, MVT::v2f16, 2}, // fcvtl+fcvtl
{ISD::FP_EXTEND, MVT::v4f64, MVT::v4f16, 3}, // fcvtl+fcvtl2+fcvtl
{ISD::FP_EXTEND, MVT::v8f64, MVT::v8f16, 6}, // 2 * fcvtl+fcvtl2+fcvtl
+ // BF16 (uses shift)
+ {ISD::FP_EXTEND, MVT::f32, MVT::bf16, 1}, // shl
+ {ISD::FP_EXTEND, MVT::f64, MVT::bf16, 2}, // shl+fcvt
+ {ISD::FP_EXTEND, MVT::v4f32, MVT::v4bf16, 1}, // shll
+ {ISD::FP_EXTEND, MVT::v8f32, MVT::v8bf16, 2}, // shll+shll2
+ {ISD::FP_EXTEND, MVT::v2f64, MVT::v2bf16, 2}, // shll+fcvtl
+ {ISD::FP_EXTEND, MVT::v4f64, MVT::v4bf16, 3}, // shll+fcvtl+fcvtl2
+ {ISD::FP_EXTEND, MVT::v8f64, MVT::v8bf16, 6}, // 2 * shll+fcvtl+fcvtl2
// FP Ext and trunc
{ISD::FP_ROUND, MVT::f32, MVT::f64, 1}, // fcvt
{ISD::FP_ROUND, MVT::v2f32, MVT::v2f64, 1}, // fcvtn
@@ -2859,6 +2883,15 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
{ISD::FP_ROUND, MVT::v2f16, MVT::v2f64, 2}, // fcvtn+fcvtn
{ISD::FP_ROUND, MVT::v4f16, MVT::v4f64, 3}, // fcvtn+fcvtn2+fcvtn
{ISD::FP_ROUND, MVT::v8f16, MVT::v8f64, 6}, // 2 * fcvtn+fcvtn2+fcvtn
+ // BF16 (more complex, with +bf16 is handled above)
+ {ISD::FP_ROUND, MVT::bf16, MVT::f32, 8}, // Expansion is ~8 insns
+ {ISD::FP_ROUND, MVT::bf16, MVT::f64, 9}, // fcvtn + above
+ {ISD::FP_ROUND, MVT::v2bf16, MVT::v2f32, 8},
+ {ISD::FP_ROUND, MVT::v4bf16, MVT::v4f32, 8},
+ {ISD::FP_ROUND, MVT::v8bf16, MVT::v8f32, 15},
+ {ISD::FP_ROUND, MVT::v2bf16, MVT::v2f64, 9},
+ {ISD::FP_ROUND, MVT::v4bf16, MVT::v4f64, 10},
+ {ISD::FP_ROUND, MVT::v8bf16, MVT::v8f64, 19},
// LowerVectorINT_TO_FP:
{ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1},
@@ -4705,10 +4738,21 @@ InstructionCost AArch64TTIImpl::getShuffleCost(
}
Kind = improveShuffleKindFromMask(Kind, Mask, Tp, Index, SubTp);
- // Treat extractsubvector as single op permutation.
bool IsExtractSubvector = Kind == TTI::SK_ExtractSubvector;
- if (IsExtractSubvector && LT.second.isFixedLengthVector())
+ // A sebvector extract can be implemented with a ext (or trivial extract, if
+ // from lane 0). This currently only handles low or high extracts to prevent
+ // SLP vectorizer regressions.
+ if (IsExtractSubvector && LT.second.isFixedLengthVector()) {
+ if (LT.second.is128BitVector() &&
+ cast<FixedVectorType>(SubTp)->getNumElements() ==
+ LT.second.getVectorNumElements() / 2) {
+ if (Index == 0)
+ return 0;
+ if (Index == (int)LT.second.getVectorNumElements() / 2)
+ return 1;
+ }
Kind = TTI::SK_PermuteSingleSrc;
+ }
// Check for broadcast loads, which are supported by the LD1R instruction.
// In terms of code-size, the shuffle vector is free when a load + dup get
@@ -4919,6 +4963,12 @@ static bool containsDecreasingPointers(Loop *TheLoop,
return false;
}
+bool AArch64TTIImpl::preferFixedOverScalableIfEqualCost() const {
+ if (SVEPreferFixedOverScalableIfEqualCost.getNumOccurrences())
+ return SVEPreferFixedOverScalableIfEqualCost;
+ return ST->useFixedOverScalableIfEqualCost();
+}
+
unsigned AArch64TTIImpl::getEpilogueVectorizationMinVF() const {
return ST->getEpilogueVectorizationMinVF();
}
@@ -5283,11 +5333,17 @@ bool AArch64TTIImpl::isProfitableToSinkOperands(
}
}
- // Sink vscales closer to uses for better isel
+ auto ShouldSinkCondition = [](Value *Cond) -> bool {
+ auto *II = dyn_cast<IntrinsicInst>(Cond);
+ return II && II->getIntrinsicID() == Intrinsic::vector_reduce_or &&
+ isa<ScalableVectorType>(II->getOperand(0)->getType());
+ };
+
switch (I->getOpcode()) {
case Instruction::GetElementPtr:
case Instruction::Add:
case Instruction::Sub:
+ // Sink vscales closer to uses for better isel
for (unsigned Op = 0; Op < I->getNumOperands(); ++Op) {
if (shouldSinkVScale(I->getOperand(Op), Ops)) {
Ops.push_back(&I->getOperandUse(Op));
@@ -5295,6 +5351,23 @@ bool AArch64TTIImpl::isProfitableToSinkOperands(
}
}
break;
+ case Instruction::Select: {
+ if (!ShouldSinkCondition(I->getOperand(0)))
+ return false;
+
+ Ops.push_back(&I->getOperandUse(0));
+ return true;
+ }
+ case Instruction::Br: {
+ if (cast<BranchInst>(I)->isUnconditional())
+ return false;
+
+ if (!ShouldSinkCondition(cast<BranchInst>(I)->getCondition()))
+ return false;
+
+ Ops.push_back(&I->getOperandUse(0));
+ return true;
+ }
default:
break;
}
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
index 83b86e3..214fb4e 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -387,9 +387,7 @@ public:
return TailFoldingStyle::DataWithoutLaneMask;
}
- bool preferFixedOverScalableIfEqualCost() const {
- return ST->useFixedOverScalableIfEqualCost();
- }
+ bool preferFixedOverScalableIfEqualCost() const;
unsigned getEpilogueVectorizationMinVF() const;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 4b7d415..93461e3 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -454,6 +454,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
{nxv2s64, p0, nxv2s64, 8},
})
.clampScalar(0, s8, s64)
+ .minScalarOrElt(0, s8)
.lowerIf([=](const LegalityQuery &Query) {
return Query.Types[0].isScalar() &&
Query.Types[0] != Query.MMODescrs[0].MemoryTy;
@@ -466,14 +467,19 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.clampMaxNumElements(0, p0, 2)
.lowerIfMemSizeNotPow2()
// TODO: Use BITCAST for v2i8, v2i16 after G_TRUNC gets sorted out
- .bitcastIf(typeInSet(0, {v4s8}),
+ .bitcastIf(all(typeInSet(0, {v4s8}),
+ LegalityPredicate([=](const LegalityQuery &Query) {
+ return Query.Types[0].getSizeInBits() ==
+ Query.MMODescrs[0].MemoryTy.getSizeInBits();
+ })),
[=](const LegalityQuery &Query) {
const LLT VecTy = Query.Types[0];
return std::pair(0, LLT::scalar(VecTy.getSizeInBits()));
})
.customIf(IsPtrVecPred)
.scalarizeIf(typeInSet(0, {v2s16, v2s8}), 0)
- .scalarizeIf(scalarOrEltWiderThan(0, 64), 0);
+ .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
+ .lower();
getActionDefinitionsBuilder(G_INDEXED_STORE)
// Idx 0 == Ptr, Idx 1 == Val
@@ -861,6 +867,13 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.legalForCartesianProduct({s32, v2s16, v4s8})
.legalForCartesianProduct({s64, v8s8, v4s16, v2s32})
.legalForCartesianProduct({s128, v16s8, v8s16, v4s32, v2s64, v2p0})
+ .customIf([=](const LegalityQuery &Query) {
+ // Handle casts from i1 vectors to scalars.
+ LLT DstTy = Query.Types[0];
+ LLT SrcTy = Query.Types[1];
+ return DstTy.isScalar() && SrcTy.isVector() &&
+ SrcTy.getScalarSizeInBits() == 1;
+ })
.lowerIf([=](const LegalityQuery &Query) {
return Query.Types[0].isVector() != Query.Types[1].isVector();
})
@@ -1062,10 +1075,11 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
return llvm::is_contained(
{v2s64, v2s32, v4s32, v4s16, v16s8, v8s8, v8s16}, DstTy);
})
- // G_SHUFFLE_VECTOR can have scalar sources (from 1 x s vectors), we
- // just want those lowered into G_BUILD_VECTOR
+ // G_SHUFFLE_VECTOR can have scalar sources (from 1 x s vectors) or scalar
+ // destinations, we just want those lowered into G_BUILD_VECTOR or
+ // G_EXTRACT_ELEMENT.
.lowerIf([=](const LegalityQuery &Query) {
- return !Query.Types[1].isVector();
+ return !Query.Types[0].isVector() || !Query.Types[1].isVector();
})
.moreElementsIf(
[](const LegalityQuery &Query) {
@@ -1404,11 +1418,28 @@ bool AArch64LegalizerInfo::legalizeCustom(
return Helper.lowerAbsToCNeg(MI);
case TargetOpcode::G_ICMP:
return legalizeICMP(MI, MRI, MIRBuilder);
+ case TargetOpcode::G_BITCAST:
+ return legalizeBitcast(MI, Helper);
}
llvm_unreachable("expected switch to return");
}
+bool AArch64LegalizerInfo::legalizeBitcast(MachineInstr &MI,
+ LegalizerHelper &Helper) const {
+ assert(MI.getOpcode() == TargetOpcode::G_BITCAST && "Unexpected opcode");
+ auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
+ // We're trying to handle casts from i1 vectors to scalars but reloading from
+ // stack.
+ if (!DstTy.isScalar() || !SrcTy.isVector() ||
+ SrcTy.getElementType() != LLT::scalar(1))
+ return false;
+
+ Helper.createStackStoreLoad(DstReg, SrcReg);
+ MI.eraseFromParent();
+ return true;
+}
+
bool AArch64LegalizerInfo::legalizeFunnelShift(MachineInstr &MI,
MachineRegisterInfo &MRI,
MachineIRBuilder &MIRBuilder,
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
index 00d85a3..bcb29432 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
@@ -66,6 +66,7 @@ private:
LegalizerHelper &Helper) const;
bool legalizeDynStackAlloc(MachineInstr &MI, LegalizerHelper &Helper) const;
bool legalizePrefetch(MachineInstr &MI, LegalizerHelper &Helper) const;
+ bool legalizeBitcast(MachineInstr &MI, LegalizerHelper &Helper) const;
const AArch64Subtarget *ST;
};
} // End llvm namespace.
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
index 5fe2e3c..6bba70d 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
@@ -405,6 +405,19 @@ void applyEXT(MachineInstr &MI, ShuffleVectorPseudo &MatchInfo) {
MI.eraseFromParent();
}
+void applyFullRev(MachineInstr &MI, MachineRegisterInfo &MRI) {
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src = MI.getOperand(1).getReg();
+ LLT DstTy = MRI.getType(Dst);
+ assert(DstTy.getSizeInBits() == 128 &&
+ "Expected 128bit vector in applyFullRev");
+ MachineIRBuilder MIRBuilder(MI);
+ auto Cst = MIRBuilder.buildConstant(LLT::scalar(32), 8);
+ auto Rev = MIRBuilder.buildInstr(AArch64::G_REV64, {DstTy}, {Src});
+ MIRBuilder.buildInstr(AArch64::G_EXT, {Dst}, {Rev, Rev, Cst});
+ MI.eraseFromParent();
+}
+
bool matchNonConstInsert(MachineInstr &MI, MachineRegisterInfo &MRI) {
assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT);
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
index ae84bc9..875b505 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
@@ -1874,26 +1874,25 @@ void AArch64InstPrinter::printBarriernXSOption(const MCInst *MI, unsigned OpNo,
markup(O, Markup::Immediate) << "#" << Val;
}
-static bool isValidSysReg(const AArch64SysReg::SysReg *Reg, bool Read,
+static bool isValidSysReg(const AArch64SysReg::SysReg &Reg, bool Read,
const MCSubtargetInfo &STI) {
- return (Reg && (Read ? Reg->Readable : Reg->Writeable) &&
- Reg->haveFeatures(STI.getFeatureBits()));
+ return (Read ? Reg.Readable : Reg.Writeable) &&
+ Reg.haveFeatures(STI.getFeatureBits());
}
-// Looks up a system register either by encoding or by name. Some system
+// Looks up a system register either by encoding. Some system
// registers share the same encoding between different architectures,
-// therefore a tablegen lookup by encoding will return an entry regardless
-// of the register's predication on a specific subtarget feature. To work
-// around this problem we keep an alternative name for such registers and
-// look them up by that name if the first lookup was unsuccessful.
+// to work around this tablegen will return a range of registers with the same
+// encodings. We need to check each register in the range to see if it valid.
static const AArch64SysReg::SysReg *lookupSysReg(unsigned Val, bool Read,
const MCSubtargetInfo &STI) {
- const AArch64SysReg::SysReg *Reg = AArch64SysReg::lookupSysRegByEncoding(Val);
-
- if (Reg && !isValidSysReg(Reg, Read, STI))
- Reg = AArch64SysReg::lookupSysRegByName(Reg->AltName);
+ auto Range = AArch64SysReg::lookupSysRegByEncoding(Val);
+ for (auto &Reg : Range) {
+ if (isValidSysReg(Reg, Read, STI))
+ return &Reg;
+ }
- return Reg;
+ return nullptr;
}
void AArch64InstPrinter::printMRSSystemRegister(const MCInst *MI, unsigned OpNo,
@@ -1917,7 +1916,7 @@ void AArch64InstPrinter::printMRSSystemRegister(const MCInst *MI, unsigned OpNo,
const AArch64SysReg::SysReg *Reg = lookupSysReg(Val, true /*Read*/, STI);
- if (isValidSysReg(Reg, true /*Read*/, STI))
+ if (Reg)
O << Reg->Name;
else
O << AArch64SysReg::genericRegisterString(Val);
@@ -1944,7 +1943,7 @@ void AArch64InstPrinter::printMSRSystemRegister(const MCInst *MI, unsigned OpNo,
const AArch64SysReg::SysReg *Reg = lookupSysReg(Val, false /*Read*/, STI);
- if (isValidSysReg(Reg, false /*Read*/, STI))
+ if (Reg)
O << Reg->Name;
else
O << AArch64SysReg::genericRegisterString(Val);
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index a831de8..0ef862f 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -484,6 +484,7 @@ let Predicates = [HasSVEorSME] in {
//===----------------------------------------------------------------------===//
def SVEDup0 : ComplexPattern<vAny, 0, "SelectDupZero", []>;
def SVEDup0Undef : ComplexPattern<vAny, 0, "SelectDupZeroOrUndef", []>;
+def SVEAny : ComplexPattern<vAny, 0, "SelectAny", []>;
class SVE_1_Op_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
Instruction inst>
@@ -504,10 +505,15 @@ multiclass SVE_1_Op_PassthruUndef_Pat<ValueType vtd, SDPatternOperator op, Value
(inst $Op3, $Op1, $Op2)>;
}
-class SVE_1_Op_PassthruUndefZero_Pat<ValueType vtd, SDPatternOperator op, ValueType pg,
- ValueType vts, Instruction inst>
- : Pat<(vtd (op pg:$Op1, vts:$Op2, (vtd (SVEDup0Undef)))),
- (inst $Op1, $Op2)>;
+multiclass SVE_1_Op_PassthruUndefZero_Pat<ValueType vtd, SDPatternOperator op, ValueType pg,
+ ValueType vts, Instruction inst> {
+ let AddedComplexity = 1 in {
+ def : Pat<(vtd (op pg:$Op1, vts:$Op2, (vtd (SVEDup0Undef)))),
+ (inst $Op1, $Op2)>;
+ def : Pat<(vtd (op (pg (SVEAllActive:$Op1)), vts:$Op2, (vtd (SVEAny)))),
+ (inst $Op1, $Op2)>;
+ }
+}
// Used to match FP_ROUND_MERGE_PASSTHRU, which has an additional flag for the
// type of rounding. This is matched by timm0_1 in pattern below and ignored.
@@ -576,10 +582,15 @@ multiclass SVE_3_Op_Undef_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1
(inst $Op1, $Op2, $Op3)>;
}
-class SVE_3_Op_UndefZero_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
- ValueType vt2, ValueType vt3, Instruction inst>
- : Pat<(vtd (op (vt1 (SVEDup0Undef)), vt2:$Op1, vt3:$Op2)),
- (inst $Op1, $Op2)>;
+multiclass SVE_3_Op_UndefZero_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
+ ValueType vt2, ValueType vt3, Instruction inst> {
+ let AddedComplexity = 1 in {
+ def : Pat<(vtd (op (vt1 (SVEDup0Undef)), vt2:$Op1, vt3:$Op2)),
+ (inst $Op1, $Op2)>;
+ def : Pat<(vtd (op (vt1 (SVEAny)), (vt2 (SVEAllActive:$Op2)), vt3:$Op3)),
+ (inst $Op2, $Op3)>;
+ }
+}
class SVE_4_Op_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
ValueType vt2, ValueType vt3, ValueType vt4,
@@ -2840,8 +2851,8 @@ multiclass sve2_fp_convert_up_long_z<string asm, string op> {
def _HtoS : sve2_fp_convert_precision<0b1001, 0b0, asm, ZPR32, ZPR16>;
def _StoD : sve2_fp_convert_precision<0b1111, 0b0, asm, ZPR64, ZPR32>;
- def : SVE_3_Op_UndefZero_Pat<nxv4f32, !cast<SDPatternOperator>(op # _f32f16), nxv4f32, nxv4i1, nxv8f16, !cast<Instruction>(NAME # _HtoS)>;
- def : SVE_3_Op_UndefZero_Pat<nxv2f64, !cast<SDPatternOperator>(op # _f64f32), nxv2f64, nxv2i1, nxv4f32, !cast<Instruction>(NAME # _StoD)>;
+ defm : SVE_3_Op_UndefZero_Pat<nxv4f32, !cast<SDPatternOperator>(op # _f32f16), nxv4f32, nxv4i1, nxv8f16, !cast<Instruction>(NAME # _HtoS)>;
+ defm : SVE_3_Op_UndefZero_Pat<nxv2f64, !cast<SDPatternOperator>(op # _f64f32), nxv2f64, nxv2i1, nxv4f32, !cast<Instruction>(NAME # _StoD)>;
}
multiclass sve2_fp_convert_down_narrow_z<string asm> {
@@ -3256,7 +3267,7 @@ class sve_fp_z2op_p_zd<bits<7> opc,string asm, RegisterOperand i_zprtype,
multiclass sve_fp_z2op_p_zd<string asm, SDPatternOperator op> {
def _DtoS : sve_fp_z2op_p_zd<0b0001010, asm, ZPR64, ZPR32>;
- def : SVE_3_Op_UndefZero_Pat<nxv4f32, op, nxv4f32, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _DtoS)>;
+ defm : SVE_3_Op_UndefZero_Pat<nxv4f32, op, nxv4f32, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _DtoS)>;
}
multiclass sve_fp_z2op_p_zd_hsd<bits<5> opc, string asm> {
@@ -3273,7 +3284,7 @@ multiclass sve_fp_z2op_p_zd_frint<bits<2> opc, string asm> {
multiclass sve_fp_z2op_p_zd_bfcvt<string asm, SDPatternOperator op> {
def NAME : sve_fp_z2op_p_zd<0b1001010, asm, ZPR32, ZPR16>;
- def : SVE_3_Op_UndefZero_Pat<nxv8bf16, op, nxv8bf16, nxv4i1, nxv4f32, !cast<Instruction>(NAME)>;
+ defm : SVE_3_Op_UndefZero_Pat<nxv8bf16, op, nxv8bf16, nxv4i1, nxv4f32, !cast<Instruction>(NAME)>;
}
multiclass sve_fp_z2op_p_zd_d<bit U, string asm, string int_op, SDPatternOperator ir_op> {
@@ -3285,14 +3296,14 @@ multiclass sve_fp_z2op_p_zd_d<bit U, string asm, string int_op, SDPatternOperato
def _DtoS : sve_fp_z2op_p_zd<{ 0b111100, U }, asm, ZPR64, ZPR32>;
def _DtoD : sve_fp_z2op_p_zd<{ 0b111111, U }, asm, ZPR64, ZPR64>;
- def : SVE_3_Op_UndefZero_Pat<nxv4i32, !cast<SDPatternOperator>(int_op # _i32f64), nxv4i32, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _DtoS)>;
- def : SVE_3_Op_UndefZero_Pat<nxv2i64, !cast<SDPatternOperator>(int_op # _i64f32), nxv2i64, nxv2i1, nxv4f32, !cast<Instruction>(NAME # _StoD)>;
- def : SVE_3_Op_UndefZero_Pat<nxv4i32, !cast<SDPatternOperator>(int_op # _i32f16), nxv4i32, nxv4i1, nxv8f16, !cast<Instruction>(NAME # _HtoS)>;
- def : SVE_3_Op_UndefZero_Pat<nxv2i64, !cast<SDPatternOperator>(int_op # _i64f16), nxv2i64, nxv2i1, nxv8f16, !cast<Instruction>(NAME # _HtoD)>;
+ defm : SVE_3_Op_UndefZero_Pat<nxv4i32, !cast<SDPatternOperator>(int_op # _i32f64), nxv4i32, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _DtoS)>;
+ defm : SVE_3_Op_UndefZero_Pat<nxv2i64, !cast<SDPatternOperator>(int_op # _i64f32), nxv2i64, nxv2i1, nxv4f32, !cast<Instruction>(NAME # _StoD)>;
+ defm : SVE_3_Op_UndefZero_Pat<nxv4i32, !cast<SDPatternOperator>(int_op # _i32f16), nxv4i32, nxv4i1, nxv8f16, !cast<Instruction>(NAME # _HtoS)>;
+ defm : SVE_3_Op_UndefZero_Pat<nxv2i64, !cast<SDPatternOperator>(int_op # _i64f16), nxv2i64, nxv2i1, nxv8f16, !cast<Instruction>(NAME # _HtoD)>;
- def : SVE_1_Op_PassthruUndefZero_Pat<nxv8i16, ir_op, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _HtoH)>;
- def : SVE_1_Op_PassthruUndefZero_Pat<nxv4i32, ir_op, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _StoS)>;
- def : SVE_1_Op_PassthruUndefZero_Pat<nxv2i64, ir_op, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _DtoD)>;
+ defm : SVE_1_Op_PassthruUndefZero_Pat<nxv8i16, ir_op, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _HtoH)>;
+ defm : SVE_1_Op_PassthruUndefZero_Pat<nxv4i32, ir_op, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _StoS)>;
+ defm : SVE_1_Op_PassthruUndefZero_Pat<nxv2i64, ir_op, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _DtoD)>;
}
multiclass sve_fp_z2op_p_zd_c<bit U, string asm> {
@@ -3319,12 +3330,12 @@ multiclass sve_fp_z2op_p_zd_b_0<string asm, string op> {
def _DtoS : sve_fp_z2op_p_zd<0b1101010, asm, ZPR64, ZPR32>;
def _StoD : sve_fp_z2op_p_zd<0b1101011, asm, ZPR32, ZPR64>;
- def : SVE_3_Op_UndefZero_Pat<nxv8f16, !cast<SDPatternOperator>(op # _f16f32), nxv8f16, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _StoH)>;
- def : SVE_3_Op_UndefZero_Pat<nxv8f16, !cast<SDPatternOperator>(op # _f16f64), nxv8f16, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _DtoH)>;
- def : SVE_3_Op_UndefZero_Pat<nxv4f32, !cast<SDPatternOperator>(op # _f32f64), nxv4f32, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _DtoS)>;
- def : SVE_3_Op_UndefZero_Pat<nxv4f32, !cast<SDPatternOperator>(op # _f32f16), nxv4f32, nxv4i1, nxv8f16, !cast<Instruction>(NAME # _HtoS)>;
- def : SVE_3_Op_UndefZero_Pat<nxv2f64, !cast<SDPatternOperator>(op # _f64f16), nxv2f64, nxv2i1, nxv8f16, !cast<Instruction>(NAME # _HtoD)>;
- def : SVE_3_Op_UndefZero_Pat<nxv2f64, !cast<SDPatternOperator>(op # _f64f32), nxv2f64, nxv2i1, nxv4f32, !cast<Instruction>(NAME # _StoD)>;
+ defm : SVE_3_Op_UndefZero_Pat<nxv8f16, !cast<SDPatternOperator>(op # _f16f32), nxv8f16, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _StoH)>;
+ defm : SVE_3_Op_UndefZero_Pat<nxv8f16, !cast<SDPatternOperator>(op # _f16f64), nxv8f16, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _DtoH)>;
+ defm : SVE_3_Op_UndefZero_Pat<nxv4f32, !cast<SDPatternOperator>(op # _f32f64), nxv4f32, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _DtoS)>;
+ defm : SVE_3_Op_UndefZero_Pat<nxv4f32, !cast<SDPatternOperator>(op # _f32f16), nxv4f32, nxv4i1, nxv8f16, !cast<Instruction>(NAME # _HtoS)>;
+ defm : SVE_3_Op_UndefZero_Pat<nxv2f64, !cast<SDPatternOperator>(op # _f64f16), nxv2f64, nxv2i1, nxv8f16, !cast<Instruction>(NAME # _HtoD)>;
+ defm : SVE_3_Op_UndefZero_Pat<nxv2f64, !cast<SDPatternOperator>(op # _f64f32), nxv2f64, nxv2i1, nxv4f32, !cast<Instruction>(NAME # _StoD)>;
}
//===----------------------------------------------------------------------===//
@@ -4842,6 +4853,16 @@ multiclass sve_int_un_pred_arit<bits<3> opc, string asm,
def : SVE_1_Op_Passthru_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
def : SVE_1_Op_Passthru_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
def : SVE_1_Op_Passthru_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
+
+ def _B_UNDEF : PredOneOpPassthruPseudo<NAME # _B, ZPR8>;
+ def _H_UNDEF : PredOneOpPassthruPseudo<NAME # _H, ZPR16>;
+ def _S_UNDEF : PredOneOpPassthruPseudo<NAME # _S, ZPR32>;
+ def _D_UNDEF : PredOneOpPassthruPseudo<NAME # _D, ZPR64>;
+
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Pseudo>(NAME # _B_UNDEF)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Pseudo>(NAME # _H_UNDEF)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Pseudo>(NAME # _S_UNDEF)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Pseudo>(NAME # _D_UNDEF)>;
}
multiclass sve_int_un_pred_arit_z<bits<3> opc, string asm, SDPatternOperator op> {
@@ -4850,10 +4871,10 @@ multiclass sve_int_un_pred_arit_z<bits<3> opc, string asm, SDPatternOperator op>
def _S : sve_int_un_pred_arit_z<0b10, { opc, 0b0 }, asm, ZPR32>;
def _D : sve_int_un_pred_arit_z<0b11, { opc, 0b0 }, asm, ZPR64>;
- def : SVE_1_Op_PassthruUndefZero_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>;
- def : SVE_1_Op_PassthruUndefZero_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
- def : SVE_1_Op_PassthruUndefZero_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
- def : SVE_1_Op_PassthruUndefZero_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
+ defm : SVE_1_Op_PassthruUndefZero_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>;
+ defm : SVE_1_Op_PassthruUndefZero_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ defm : SVE_1_Op_PassthruUndefZero_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ defm : SVE_1_Op_PassthruUndefZero_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
multiclass sve_int_un_pred_arit_h<bits<3> opc, string asm,
@@ -4967,6 +4988,17 @@ multiclass sve_int_un_pred_arit_bitwise_fp<bits<3> opc, string asm,
def : SVE_1_Op_Passthru_Pat<nxv4f32, op, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;
def : SVE_1_Op_Passthru_Pat<nxv2f32, op, nxv2i1, nxv2f32, !cast<Instruction>(NAME # _S)>;
def : SVE_1_Op_Passthru_Pat<nxv2f64, op, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
+
+ def _H_UNDEF : PredOneOpPassthruPseudo<NAME # _H, ZPR16>;
+ def _S_UNDEF : PredOneOpPassthruPseudo<NAME # _S, ZPR32>;
+ def _D_UNDEF : PredOneOpPassthruPseudo<NAME # _D, ZPR64>;
+
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv8f16, op, nxv8i1, nxv8f16, !cast<Pseudo>(NAME # _H_UNDEF)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv4f16, op, nxv4i1, nxv4f16, !cast<Pseudo>(NAME # _H_UNDEF)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv2f16, op, nxv2i1, nxv2f16, !cast<Pseudo>(NAME # _H_UNDEF)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv4f32, op, nxv4i1, nxv4f32, !cast<Pseudo>(NAME # _S_UNDEF)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv2f32, op, nxv2i1, nxv2f32, !cast<Pseudo>(NAME # _S_UNDEF)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv2f64, op, nxv2i1, nxv2f64, !cast<Pseudo>(NAME # _D_UNDEF)>;
}
multiclass sve_int_un_pred_arit_bitwise_fp_z<bits<3> opc, string asm, SDPatternOperator op> {
@@ -4974,12 +5006,12 @@ multiclass sve_int_un_pred_arit_bitwise_fp_z<bits<3> opc, string asm, SDPatternO
def _S : sve_int_un_pred_arit_z<0b10, { opc, 0b1 }, asm, ZPR32>;
def _D : sve_int_un_pred_arit_z<0b11, { opc, 0b1 }, asm, ZPR64>;
- def : SVE_1_Op_PassthruUndefZero_Pat<nxv8f16, op, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>;
- def : SVE_1_Op_PassthruUndefZero_Pat<nxv4f16, op, nxv4i1, nxv4f16, !cast<Instruction>(NAME # _H)>;
- def : SVE_1_Op_PassthruUndefZero_Pat<nxv2f16, op, nxv2i1, nxv2f16, !cast<Instruction>(NAME # _H)>;
- def : SVE_1_Op_PassthruUndefZero_Pat<nxv4f32, op, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;
- def : SVE_1_Op_PassthruUndefZero_Pat<nxv2f32, op, nxv2i1, nxv2f32, !cast<Instruction>(NAME # _S)>;
- def : SVE_1_Op_PassthruUndefZero_Pat<nxv2f64, op, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
+ defm : SVE_1_Op_PassthruUndefZero_Pat<nxv8f16, op, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>;
+ defm : SVE_1_Op_PassthruUndefZero_Pat<nxv4f16, op, nxv4i1, nxv4f16, !cast<Instruction>(NAME # _H)>;
+ defm : SVE_1_Op_PassthruUndefZero_Pat<nxv2f16, op, nxv2i1, nxv2f16, !cast<Instruction>(NAME # _H)>;
+ defm : SVE_1_Op_PassthruUndefZero_Pat<nxv4f32, op, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;
+ defm : SVE_1_Op_PassthruUndefZero_Pat<nxv2f32, op, nxv2i1, nxv2f32, !cast<Instruction>(NAME # _S)>;
+ defm : SVE_1_Op_PassthruUndefZero_Pat<nxv2f64, op, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
}
multiclass sve_fp_un_pred_arit_hsd<SDPatternOperator op> {
diff --git a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
index d83c22e..7767028 100644
--- a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
+++ b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
@@ -18,7 +18,7 @@ using namespace llvm;
namespace llvm {
namespace AArch64AT {
-#define GET_AT_IMPL
+#define GET_ATsList_IMPL
#include "AArch64GenSystemOperands.inc"
}
}
@@ -26,128 +26,121 @@ namespace llvm {
namespace llvm {
namespace AArch64DBnXS {
-#define GET_DBNXS_IMPL
+#define GET_DBnXSsList_IMPL
#include "AArch64GenSystemOperands.inc"
}
}
namespace llvm {
namespace AArch64DB {
-#define GET_DB_IMPL
+#define GET_DBsList_IMPL
#include "AArch64GenSystemOperands.inc"
}
}
namespace llvm {
namespace AArch64DC {
-#define GET_DC_IMPL
+#define GET_DCsList_IMPL
#include "AArch64GenSystemOperands.inc"
}
}
namespace llvm {
namespace AArch64IC {
-#define GET_IC_IMPL
+#define GET_ICsList_IMPL
#include "AArch64GenSystemOperands.inc"
}
}
namespace llvm {
namespace AArch64ISB {
-#define GET_ISB_IMPL
+#define GET_ISBsList_IMPL
#include "AArch64GenSystemOperands.inc"
}
}
namespace llvm {
namespace AArch64TSB {
-#define GET_TSB_IMPL
-#include "AArch64GenSystemOperands.inc"
- }
-}
-
-namespace llvm {
- namespace AArch64PRCTX {
-#define GET_PRCTX_IMPL
+#define GET_TSBsList_IMPL
#include "AArch64GenSystemOperands.inc"
}
}
namespace llvm {
namespace AArch64PRFM {
-#define GET_PRFM_IMPL
+#define GET_PRFMsList_IMPL
#include "AArch64GenSystemOperands.inc"
}
}
namespace llvm {
namespace AArch64SVEPRFM {
-#define GET_SVEPRFM_IMPL
+#define GET_SVEPRFMsList_IMPL
#include "AArch64GenSystemOperands.inc"
}
}
namespace llvm {
namespace AArch64RPRFM {
-#define GET_RPRFM_IMPL
+#define GET_RPRFMsList_IMPL
#include "AArch64GenSystemOperands.inc"
} // namespace AArch64RPRFM
} // namespace llvm
namespace llvm {
namespace AArch64SVEPredPattern {
-#define GET_SVEPREDPAT_IMPL
+#define GET_SVEPREDPATsList_IMPL
#include "AArch64GenSystemOperands.inc"
}
}
namespace llvm {
namespace AArch64SVEVecLenSpecifier {
-#define GET_SVEVECLENSPECIFIER_IMPL
+#define GET_SVEVECLENSPECIFIERsList_IMPL
#include "AArch64GenSystemOperands.inc"
} // namespace AArch64SVEVecLenSpecifier
} // namespace llvm
namespace llvm {
namespace AArch64ExactFPImm {
-#define GET_EXACTFPIMM_IMPL
+#define GET_ExactFPImmsList_IMPL
#include "AArch64GenSystemOperands.inc"
}
}
namespace llvm {
namespace AArch64PState {
-#define GET_PSTATEIMM0_15_IMPL
+#define GET_PStateImm0_15sList_IMPL
#include "AArch64GenSystemOperands.inc"
-#define GET_PSTATEIMM0_1_IMPL
+#define GET_PStateImm0_1sList_IMPL
#include "AArch64GenSystemOperands.inc"
}
}
namespace llvm {
namespace AArch64PSBHint {
-#define GET_PSB_IMPL
+#define GET_PSBsList_IMPL
#include "AArch64GenSystemOperands.inc"
}
}
namespace llvm {
namespace AArch64PHint {
-#define GET_PHINT_IMPL
+#define GET_PHintsList_IMPL
#include "AArch64GenSystemOperands.inc"
} // namespace AArch64PHint
} // namespace llvm
namespace llvm {
namespace AArch64BTIHint {
-#define GET_BTI_IMPL
+#define GET_BTIsList_IMPL
#include "AArch64GenSystemOperands.inc"
}
}
namespace llvm {
namespace AArch64SysReg {
-#define GET_SYSREG_IMPL
+#define GET_SysRegsList_IMPL
#include "AArch64GenSystemOperands.inc"
}
}
@@ -194,7 +187,7 @@ namespace llvm {
namespace llvm {
namespace AArch64SVCR {
-#define GET_SVCR_IMPL
+#define GET_SVCRsList_IMPL
#include "AArch64GenSystemOperands.inc"
}
}
diff --git a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
index e0ccba4..b8d3236 100644
--- a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
+++ b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
@@ -371,79 +371,89 @@ namespace AArch64SVCR {
struct SVCR : SysAlias{
using SysAlias::SysAlias;
};
- #define GET_SVCR_DECL
- #include "AArch64GenSystemOperands.inc"
+#define GET_SVCRValues_DECL
+#define GET_SVCRsList_DECL
+#include "AArch64GenSystemOperands.inc"
}
namespace AArch64AT{
struct AT : SysAlias {
using SysAlias::SysAlias;
};
- #define GET_AT_DECL
- #include "AArch64GenSystemOperands.inc"
+#define GET_ATValues_DECL
+#define GET_ATsList_DECL
+#include "AArch64GenSystemOperands.inc"
}
namespace AArch64DB {
struct DB : SysAlias {
using SysAlias::SysAlias;
};
- #define GET_DB_DECL
- #include "AArch64GenSystemOperands.inc"
+#define GET_DBValues_DECL
+#define GET_DBsList_DECL
+#include "AArch64GenSystemOperands.inc"
}
namespace AArch64DBnXS {
struct DBnXS : SysAliasImm {
using SysAliasImm::SysAliasImm;
};
- #define GET_DBNXS_DECL
- #include "AArch64GenSystemOperands.inc"
+#define GET_DBnXSValues_DECL
+#define GET_DBnXSsList_DECL
+#include "AArch64GenSystemOperands.inc"
}
namespace AArch64DC {
struct DC : SysAlias {
using SysAlias::SysAlias;
};
- #define GET_DC_DECL
- #include "AArch64GenSystemOperands.inc"
+#define GET_DCValues_DECL
+#define GET_DCsList_DECL
+#include "AArch64GenSystemOperands.inc"
}
namespace AArch64IC {
struct IC : SysAliasReg {
using SysAliasReg::SysAliasReg;
};
- #define GET_IC_DECL
- #include "AArch64GenSystemOperands.inc"
+#define GET_ICValues_DECL
+#define GET_ICsList_DECL
+#include "AArch64GenSystemOperands.inc"
}
namespace AArch64ISB {
struct ISB : SysAlias {
using SysAlias::SysAlias;
};
- #define GET_ISB_DECL
- #include "AArch64GenSystemOperands.inc"
+#define GET_ISBValues_DECL
+#define GET_ISBsList_DECL
+#include "AArch64GenSystemOperands.inc"
}
namespace AArch64TSB {
struct TSB : SysAlias {
using SysAlias::SysAlias;
};
- #define GET_TSB_DECL
- #include "AArch64GenSystemOperands.inc"
+#define GET_TSBValues_DECL
+#define GET_TSBsList_DECL
+#include "AArch64GenSystemOperands.inc"
}
namespace AArch64PRFM {
struct PRFM : SysAlias {
using SysAlias::SysAlias;
};
- #define GET_PRFM_DECL
- #include "AArch64GenSystemOperands.inc"
+#define GET_PRFMValues_DECL
+#define GET_PRFMsList_DECL
+#include "AArch64GenSystemOperands.inc"
}
namespace AArch64SVEPRFM {
struct SVEPRFM : SysAlias {
using SysAlias::SysAlias;
};
-#define GET_SVEPRFM_DECL
+#define GET_SVEPRFMValues_DECL
+#define GET_SVEPRFMsList_DECL
#include "AArch64GenSystemOperands.inc"
}
@@ -451,7 +461,8 @@ namespace AArch64RPRFM {
struct RPRFM : SysAlias {
using SysAlias::SysAlias;
};
-#define GET_RPRFM_DECL
+#define GET_RPRFMValues_DECL
+#define GET_RPRFMsList_DECL
#include "AArch64GenSystemOperands.inc"
} // namespace AArch64RPRFM
@@ -460,7 +471,8 @@ namespace AArch64SVEPredPattern {
const char *Name;
uint16_t Encoding;
};
-#define GET_SVEPREDPAT_DECL
+#define GET_SVEPREDPATValues_DECL
+#define GET_SVEPREDPATsList_DECL
#include "AArch64GenSystemOperands.inc"
}
@@ -469,7 +481,8 @@ namespace AArch64SVEVecLenSpecifier {
const char *Name;
uint16_t Encoding;
};
-#define GET_SVEVECLENSPECIFIER_DECL
+#define GET_SVEVECLENSPECIFIERValues_DECL
+#define GET_SVEVECLENSPECIFIERsList_DECL
#include "AArch64GenSystemOperands.inc"
} // namespace AArch64SVEVecLenSpecifier
@@ -551,12 +564,12 @@ LLVM_DECLARE_ENUM_AS_BITMASK(TailFoldingOpts,
/* LargestValue */ (long)TailFoldingOpts::Reverse);
namespace AArch64ExactFPImm {
- struct ExactFPImm {
- const char *Name;
- int Enum;
- const char *Repr;
- };
-#define GET_EXACTFPIMM_DECL
+struct ExactFPImm {
+ int Enum;
+ const char *Repr;
+};
+#define GET_ExactFPImmValues_DECL
+#define GET_ExactFPImmsList_DECL
#include "AArch64GenSystemOperands.inc"
}
@@ -564,28 +577,30 @@ namespace AArch64PState {
struct PStateImm0_15 : SysAlias{
using SysAlias::SysAlias;
};
- #define GET_PSTATEIMM0_15_DECL
- #include "AArch64GenSystemOperands.inc"
+#define GET_PStateImm0_15Values_DECL
+#define GET_PStateImm0_15sList_DECL
+#include "AArch64GenSystemOperands.inc"
struct PStateImm0_1 : SysAlias{
using SysAlias::SysAlias;
};
- #define GET_PSTATEIMM0_1_DECL
- #include "AArch64GenSystemOperands.inc"
+#define GET_PStateImm0_1Values_DECL
+#define GET_PStateImm0_1sList_DECL
+#include "AArch64GenSystemOperands.inc"
}
namespace AArch64PSBHint {
struct PSB : SysAlias {
using SysAlias::SysAlias;
};
- #define GET_PSB_DECL
- #include "AArch64GenSystemOperands.inc"
+#define GET_PSBValues_DECL
+#define GET_PSBsList_DECL
+#include "AArch64GenSystemOperands.inc"
}
namespace AArch64PHint {
struct PHint {
const char *Name;
- const char *AltName;
unsigned Encoding;
FeatureBitset FeaturesRequired;
@@ -595,7 +610,8 @@ struct PHint {
}
};
-#define GET_PHINT_DECL
+#define GET_PHintValues_DECL
+#define GET_PHintsList_DECL
#include "AArch64GenSystemOperands.inc"
const PHint *lookupPHintByName(StringRef);
@@ -606,8 +622,9 @@ namespace AArch64BTIHint {
struct BTI : SysAlias {
using SysAlias::SysAlias;
};
- #define GET_BTI_DECL
- #include "AArch64GenSystemOperands.inc"
+#define GET_BTIValues_DECL
+#define GET_BTIsList_DECL
+#include "AArch64GenSystemOperands.inc"
}
namespace AArch64SME {
@@ -701,7 +718,6 @@ AArch64StringToVectorLayout(StringRef LayoutStr) {
namespace AArch64SysReg {
struct SysReg {
const char Name[32];
- const char AltName[32];
unsigned Encoding;
bool Readable;
bool Writeable;
@@ -713,11 +729,9 @@ namespace AArch64SysReg {
}
};
- #define GET_SYSREG_DECL
- #include "AArch64GenSystemOperands.inc"
-
- const SysReg *lookupSysRegByName(StringRef);
- const SysReg *lookupSysRegByEncoding(uint16_t);
+#define GET_SysRegsList_DECL
+#define GET_SysRegValues_DECL
+#include "AArch64GenSystemOperands.inc"
uint32_t parseGenericRegister(StringRef Name);
std::string genericRegisterString(uint32_t Bits);
@@ -731,14 +745,6 @@ namespace AArch64TLBI {
#include "AArch64GenSystemOperands.inc"
}
-namespace AArch64PRCTX {
- struct PRCTX : SysAliasReg {
- using SysAliasReg::SysAliasReg;
- };
- #define GET_PRCTX_DECL
- #include "AArch64GenSystemOperands.inc"
-}
-
namespace AArch64II {
/// Target Operand Flag enum.
enum TOF {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index e844904..0f97988 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -1523,7 +1523,8 @@ Value *AMDGPUCodeGenPrepareImpl::shrinkDivRem64(IRBuilder<> &Builder,
bool IsDiv = Opc == Instruction::SDiv || Opc == Instruction::UDiv;
bool IsSigned = Opc == Instruction::SDiv || Opc == Instruction::SRem;
- int NumDivBits = getDivNumBits(I, Num, Den, 32, IsSigned);
+ unsigned BitWidth = Num->getType()->getScalarSizeInBits();
+ int NumDivBits = getDivNumBits(I, Num, Den, BitWidth - 32, IsSigned);
if (NumDivBits == -1)
return nullptr;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
index 985fa8f..da47aaf 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
@@ -124,6 +124,16 @@ def sign_extension_in_reg : GICombineRule<
[{ return matchCombineSignExtendInReg(*${sign_inreg}, ${matchinfo}); }]),
(apply [{ applyCombineSignExtendInReg(*${sign_inreg}, ${matchinfo}); }])>;
+// Do the following combines :
+// fmul x, select(y, A, B) -> fldexp (x, select i32 (y, a, b))
+// fmul x, select(y, -A, -B) -> fldexp ((fneg x), select i32 (y, a, b))
+def combine_fmul_with_select_to_fldexp : GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$matchinfo),
+ (match (G_FMUL $dst, $x, $select):$root,
+ (G_SELECT $select, $y, $A, $B):$sel,
+ [{ return Helper.matchCombineFmulWithSelectToFldexp(*${root}, *${sel}, ${matchinfo}); }]),
+ (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;
+
let Predicates = [Has16BitInsts, NotHasMed3_16] in {
// For gfx8, expand f16-fmed3-as-f32 into a min/max f16 sequence. This
@@ -153,13 +163,13 @@ def gfx8_combines : GICombineGroup<[expand_promoted_fmed3]>;
def AMDGPUPreLegalizerCombiner: GICombiner<
"AMDGPUPreLegalizerCombinerImpl",
- [all_combines, clamp_i64_to_i16, foldable_fneg]> {
+ [all_combines, combine_fmul_with_select_to_fldexp, clamp_i64_to_i16, foldable_fneg]> {
let CombineAllMethodName = "tryCombineAllImpl";
}
def AMDGPUPostLegalizerCombiner: GICombiner<
"AMDGPUPostLegalizerCombinerImpl",
- [all_combines, gfx6gfx7_combines, gfx8_combines,
+ [all_combines, gfx6gfx7_combines, gfx8_combines, combine_fmul_with_select_to_fldexp,
uchar_to_float, cvt_f32_ubyteN, remove_fcanonicalize, foldable_fneg,
rcp_sqrt_to_rsq, fdiv_by_sqrt_to_rsq_f16, sign_extension_in_reg, smulu64]> {
let CombineAllMethodName = "tryCombineAllImpl";
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp
index e5a376a..f6f9f4b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp
@@ -17,6 +17,13 @@
using namespace llvm;
using namespace MIPatternMatch;
+AMDGPUCombinerHelper::AMDGPUCombinerHelper(
+ GISelChangeObserver &Observer, MachineIRBuilder &B, bool IsPreLegalize,
+ GISelKnownBits *KB, MachineDominatorTree *MDT, const LegalizerInfo *LI,
+ const GCNSubtarget &STI)
+ : CombinerHelper(Observer, B, IsPreLegalize, KB, MDT, LI), STI(STI),
+ TII(*STI.getInstrInfo()) {}
+
LLVM_READNONE
static bool fnegFoldsIntoMI(const MachineInstr &MI) {
switch (MI.getOpcode()) {
@@ -445,3 +452,67 @@ void AMDGPUCombinerHelper::applyExpandPromotedF16FMed3(MachineInstr &MI,
Builder.buildFMinNumIEEE(MI.getOperand(0), B1, C1);
MI.eraseFromParent();
}
+
+bool AMDGPUCombinerHelper::matchCombineFmulWithSelectToFldexp(
+ MachineInstr &MI, MachineInstr &Sel,
+ std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_FMUL);
+ assert(Sel.getOpcode() == TargetOpcode::G_SELECT);
+ assert(MI.getOperand(2).getReg() == Sel.getOperand(0).getReg());
+
+ Register Dst = MI.getOperand(0).getReg();
+ LLT DestTy = MRI.getType(Dst);
+ LLT ScalarDestTy = DestTy.getScalarType();
+
+ if ((ScalarDestTy != LLT::float64() && ScalarDestTy != LLT::float32() &&
+ ScalarDestTy != LLT::float16()) ||
+ !MRI.hasOneNonDBGUse(Sel.getOperand(0).getReg()))
+ return false;
+
+ Register SelectCondReg = Sel.getOperand(1).getReg();
+ MachineInstr *SelectTrue = MRI.getVRegDef(Sel.getOperand(2).getReg());
+ MachineInstr *SelectFalse = MRI.getVRegDef(Sel.getOperand(3).getReg());
+
+ const auto SelectTrueVal =
+ isConstantOrConstantSplatVectorFP(*SelectTrue, MRI);
+ if (!SelectTrueVal)
+ return false;
+ const auto SelectFalseVal =
+ isConstantOrConstantSplatVectorFP(*SelectFalse, MRI);
+ if (!SelectFalseVal)
+ return false;
+
+ if (SelectTrueVal->isNegative() != SelectFalseVal->isNegative())
+ return false;
+
+ // For f32, only non-inline constants should be transformed.
+ if (ScalarDestTy == LLT::float32() && TII.isInlineConstant(*SelectTrueVal) &&
+ TII.isInlineConstant(*SelectFalseVal))
+ return false;
+
+ int SelectTrueLog2Val = SelectTrueVal->getExactLog2Abs();
+ if (SelectTrueLog2Val == INT_MIN)
+ return false;
+ int SelectFalseLog2Val = SelectFalseVal->getExactLog2Abs();
+ if (SelectFalseLog2Val == INT_MIN)
+ return false;
+
+ MatchInfo = [=, &MI](MachineIRBuilder &Builder) {
+ LLT IntDestTy = DestTy.changeElementType(LLT::scalar(32));
+ auto NewSel = Builder.buildSelect(
+ IntDestTy, SelectCondReg,
+ Builder.buildConstant(IntDestTy, SelectTrueLog2Val),
+ Builder.buildConstant(IntDestTy, SelectFalseLog2Val));
+
+ Register XReg = MI.getOperand(1).getReg();
+ if (SelectTrueVal->isNegative()) {
+ auto NegX =
+ Builder.buildFNeg(DestTy, XReg, MRI.getVRegDef(XReg)->getFlags());
+ Builder.buildFLdexp(Dst, NegX, NewSel, MI.getFlags());
+ } else {
+ Builder.buildFLdexp(Dst, XReg, NewSel, MI.getFlags());
+ }
+ };
+
+ return true;
+}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.h b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.h
index 6510abe..893b3f5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.h
@@ -15,13 +15,22 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUCOMBINERHELPER_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUCOMBINERHELPER_H
+#include "GCNSubtarget.h"
#include "llvm/CodeGen/GlobalISel/Combiner.h"
#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
namespace llvm {
class AMDGPUCombinerHelper : public CombinerHelper {
+protected:
+ const GCNSubtarget &STI;
+ const SIInstrInfo &TII;
+
public:
using CombinerHelper::CombinerHelper;
+ AMDGPUCombinerHelper(GISelChangeObserver &Observer, MachineIRBuilder &B,
+ bool IsPreLegalize, GISelKnownBits *KB,
+ MachineDominatorTree *MDT, const LegalizerInfo *LI,
+ const GCNSubtarget &STI);
bool matchFoldableFneg(MachineInstr &MI, MachineInstr *&MatchInfo);
void applyFoldableFneg(MachineInstr &MI, MachineInstr *&MatchInfo);
@@ -30,6 +39,10 @@ public:
Register Src1, Register Src2);
void applyExpandPromotedF16FMed3(MachineInstr &MI, Register Src0,
Register Src1, Register Src2);
+
+ bool matchCombineFmulWithSelectToFldexp(
+ MachineInstr &MI, MachineInstr &Sel,
+ std::function<void(MachineIRBuilder &)> &MatchInfo);
};
} // namespace llvm
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index d9eaf82..27e9018 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -1997,7 +1997,7 @@ bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(SDNode *N, SDValue Addr,
if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, ImmOffset))
return false;
SAddr = SelectSAddrFI(CurDAG, SAddr);
- Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i32);
+ Offset = CurDAG->getSignedTargetConstant(ImmOffset, SDLoc(), MVT::i32);
return true;
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 3be865f..041b9b4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -1125,8 +1125,9 @@ static int getV_CMPOpcode(CmpInst::Predicate P, unsigned Size,
unsigned FakeS16Opc, unsigned S32Opc,
unsigned S64Opc) {
if (Size == 16)
+ // FIXME-TRUE16 use TrueS16Opc when realtrue16 is supported for CMP code
return ST.hasTrue16BitInsts()
- ? ST.useRealTrue16Insts() ? TrueS16Opc : FakeS16Opc
+ ? ST.useRealTrue16Insts() ? FakeS16Opc : FakeS16Opc
: S16Opc;
if (Size == 32)
return S32Opc;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
index 54d927c..888817e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
@@ -134,7 +134,7 @@ AMDGPUPostLegalizerCombinerImpl::AMDGPUPostLegalizerCombinerImpl(
const GCNSubtarget &STI, MachineDominatorTree *MDT, const LegalizerInfo *LI)
: Combiner(MF, CInfo, TPC, &KB, CSEInfo), RuleConfig(RuleConfig), STI(STI),
TII(*STI.getInstrInfo()),
- Helper(Observer, B, /*IsPreLegalize*/ false, &KB, MDT, LI),
+ Helper(Observer, B, /*IsPreLegalize*/ false, &KB, MDT, LI, STI),
#define GET_GICOMBINER_CONSTRUCTOR_INITS
#include "AMDGPUGenPostLegalizeGICombiner.inc"
#undef GET_GICOMBINER_CONSTRUCTOR_INITS
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp
index ff8189c..e1564d5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp
@@ -94,7 +94,7 @@ AMDGPUPreLegalizerCombinerImpl::AMDGPUPreLegalizerCombinerImpl(
const AMDGPUPreLegalizerCombinerImplRuleConfig &RuleConfig,
const GCNSubtarget &STI, MachineDominatorTree *MDT, const LegalizerInfo *LI)
: Combiner(MF, CInfo, TPC, &KB, CSEInfo), RuleConfig(RuleConfig), STI(STI),
- Helper(Observer, B, /*IsPreLegalize*/ true, &KB, MDT, LI),
+ Helper(Observer, B, /*IsPreLegalize*/ true, &KB, MDT, LI, STI),
#define GET_GICOMBINER_CONSTRUCTOR_INITS
#include "AMDGPUGenPreLegalizeGICombiner.inc"
#undef GET_GICOMBINER_CONSTRUCTOR_INITS
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index d94c400..08e23cb 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -1190,9 +1190,13 @@ bool AMDGPURegisterBankInfo::applyMappingDynStackAlloc(
const RegisterBank *SizeBank = getRegBank(AllocSize, MRI, *TRI);
- // TODO: Need to emit a wave reduction to get the maximum size.
- if (SizeBank != &AMDGPU::SGPRRegBank)
- return false;
+ if (SizeBank != &AMDGPU::SGPRRegBank) {
+ auto WaveReduction =
+ B.buildIntrinsic(Intrinsic::amdgcn_wave_reduce_umax, {LLT::scalar(32)})
+ .addUse(AllocSize)
+ .addImm(0);
+ AllocSize = WaveReduction.getReg(0);
+ }
LLT PtrTy = MRI.getType(Dst);
LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index ed956a1..d8f441d 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -9760,10 +9760,14 @@ unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
case MCK_SReg_64:
case MCK_SReg_64_XEXEC:
// Null is defined as a 32-bit register but
- // it should also be enabled with 64-bit operands.
- // The following code enables it for SReg_64 operands
+ // it should also be enabled with 64-bit operands or larger.
+ // The following code enables it for SReg_64 and larger operands
// used as source and destination. Remaining source
// operands are handled in isInlinableImm.
+ case MCK_SReg_96:
+ case MCK_SReg_128:
+ case MCK_SReg_256:
+ case MCK_SReg_512:
return Operand.isNull() ? Match_Success : Match_InvalidOperand;
default:
return Match_InvalidOperand;
diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td
index a351f45..f2686bd 100644
--- a/llvm/lib/Target/AMDGPU/BUFInstructions.td
+++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td
@@ -168,7 +168,7 @@ class getMTBUFInsDA<list<RegisterClass> vdataList,
dag SOffset = !if(hasRestrictedSOffset, (ins SReg_32:$soffset),
(ins SCSrc_b32:$soffset));
- dag NonVaddrInputs = !con((ins SReg_128:$srsrc), SOffset,
+ dag NonVaddrInputs = !con((ins SReg_128_XNULL:$srsrc), SOffset,
(ins Offset:$offset, FORMAT:$format, CPol_0:$cpol, i1imm_0:$swz));
dag Inputs = !if(!empty(vaddrList),
@@ -418,7 +418,7 @@ class getMUBUFInsDA<list<RegisterClass> vdataList,
RegisterOperand vdata_op = getLdStVDataRegisterOperand<vdataClass, isTFE>.ret;
dag SOffset = !if(hasRestrictedSOffset, (ins SReg_32:$soffset), (ins SCSrc_b32:$soffset));
- dag NonVaddrInputs = !con((ins SReg_128:$srsrc), SOffset, (ins Offset:$offset, CPol_0:$cpol, i1imm_0:$swz));
+ dag NonVaddrInputs = !con((ins SReg_128_XNULL:$srsrc), SOffset, (ins Offset:$offset, CPol_0:$cpol, i1imm_0:$swz));
dag Inputs = !if(!empty(vaddrList), NonVaddrInputs, !con((ins vaddrClass:$vaddr), NonVaddrInputs));
dag ret = !if(!empty(vdataList), Inputs, !con((ins vdata_op:$vdata), Inputs));
@@ -680,7 +680,7 @@ multiclass MUBUF_Pseudo_Stores<string opName, ValueType store_vt = i32> {
class MUBUF_Pseudo_Store_Lds<string opName>
: MUBUF_Pseudo<opName,
(outs),
- (ins SReg_128:$srsrc, SCSrc_b32:$soffset, Offset:$offset, CPol:$cpol, i1imm:$swz),
+ (ins SReg_128_XNULL:$srsrc, SCSrc_b32:$soffset, Offset:$offset, CPol:$cpol, i1imm:$swz),
" $srsrc, $soffset$offset lds$cpol"> {
let LGKM_CNT = 1;
let mayLoad = 1;
@@ -703,7 +703,7 @@ class getMUBUFAtomicInsDA<RegisterClass vdataClass, bit vdata_in, bit hasRestric
dag VData = !if(vdata_in, (ins vdata_op:$vdata_in), (ins vdata_op:$vdata));
dag Data = !if(!empty(vaddrList), VData, !con(VData, (ins vaddrClass:$vaddr)));
dag SOffset = !if(hasRestrictedSOffset, (ins SReg_32:$soffset), (ins SCSrc_b32:$soffset));
- dag MainInputs = !con((ins SReg_128:$srsrc), SOffset, (ins Offset:$offset));
+ dag MainInputs = !con((ins SReg_128_XNULL:$srsrc), SOffset, (ins Offset:$offset));
dag CPol = !if(vdata_in, (ins CPol_GLC_WithDefault:$cpol),
(ins CPol_NonGLC_WithDefault:$cpol));
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 5908351..d236327 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -279,7 +279,9 @@ DECODE_OPERAND_REG_7(SReg_64_XEXEC, OPW64)
DECODE_OPERAND_REG_7(SReg_64_XEXEC_XNULL, OPW64)
DECODE_OPERAND_REG_7(SReg_96, OPW96)
DECODE_OPERAND_REG_7(SReg_128, OPW128)
+DECODE_OPERAND_REG_7(SReg_128_XNULL, OPW128)
DECODE_OPERAND_REG_7(SReg_256, OPW256)
+DECODE_OPERAND_REG_7(SReg_256_XNULL, OPW256)
DECODE_OPERAND_REG_7(SReg_512, OPW512)
DECODE_OPERAND_REG_8(AGPR_32)
@@ -1692,6 +1694,11 @@ AMDGPUDisassembler::decodeNonVGPRSrcOp(const OpWidthTy Width, unsigned Val,
case OPW64:
case OPWV232:
return decodeSpecialReg64(Val);
+ case OPW96:
+ case OPW128:
+ case OPW256:
+ case OPW512:
+ return decodeSpecialReg96Plus(Val);
default:
llvm_unreachable("unexpected immediate type");
}
@@ -1778,6 +1785,24 @@ MCOperand AMDGPUDisassembler::decodeSpecialReg64(unsigned Val) const {
return errOperand(Val, "unknown operand encoding " + Twine(Val));
}
+MCOperand AMDGPUDisassembler::decodeSpecialReg96Plus(unsigned Val) const {
+ using namespace AMDGPU;
+
+ switch (Val) {
+ case 124:
+ if (isGFX11Plus())
+ return createRegOperand(SGPR_NULL);
+ break;
+ case 125:
+ if (!isGFX11Plus())
+ return createRegOperand(SGPR_NULL);
+ break;
+ default:
+ break;
+ }
+ return errOperand(Val, "unknown operand encoding " + Twine(Val));
+}
+
MCOperand
AMDGPUDisassembler::decodeSDWASrc(const OpWidthTy Width, const unsigned Val,
unsigned ImmWidth,
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
index b19e4b7..9a06cc3 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
@@ -259,6 +259,7 @@ public:
MCOperand decodeVOPDDstYOp(MCInst &Inst, unsigned Val) const;
MCOperand decodeSpecialReg32(unsigned Val) const;
MCOperand decodeSpecialReg64(unsigned Val) const;
+ MCOperand decodeSpecialReg96Plus(unsigned Val) const;
MCOperand decodeSDWASrc(const OpWidthTy Width, unsigned Val,
unsigned ImmWidth,
diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
index 4722d33..1b94d6c 100644
--- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td
+++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
@@ -422,7 +422,7 @@ class MIMG_NoSampler_Helper <mimgopc op, string asm,
RegisterClass addr_rc,
string dns="">
: MIMG_gfx6789 <op.GFX10M, (outs dst_rc:$vdata), dns> {
- let InOperandList = !con((ins addr_rc:$vaddr, SReg_256:$srsrc,
+ let InOperandList = !con((ins addr_rc:$vaddr, SReg_256_XNULL:$srsrc,
DMask:$dmask, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
@@ -435,7 +435,7 @@ class MIMG_NoSampler_Helper_gfx90a <mimgopc op, string asm,
RegisterClass addr_rc,
string dns="">
: MIMG_gfx90a <op.GFX10M, (outs getLdStRegisterOperand<dst_rc>.ret:$vdata), dns> {
- let InOperandList = !con((ins addr_rc:$vaddr, SReg_256:$srsrc,
+ let InOperandList = !con((ins addr_rc:$vaddr, SReg_256_XNULL:$srsrc,
DMask:$dmask, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, LWE:$lwe, DA:$da),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
@@ -447,7 +447,7 @@ class MIMG_NoSampler_gfx10<mimgopc op, string opcode,
RegisterClass DataRC, RegisterClass AddrRC,
string dns="">
: MIMG_gfx10<op.GFX10M, (outs DataRC:$vdata), dns> {
- let InOperandList = !con((ins AddrRC:$vaddr0, SReg_256:$srsrc, DMask:$dmask,
+ let InOperandList = !con((ins AddrRC:$vaddr0, SReg_256_XNULL:$srsrc, DMask:$dmask,
Dim:$dim, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, A16:$a16, TFE:$tfe, LWE:$lwe),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
@@ -460,7 +460,7 @@ class MIMG_NoSampler_nsa_gfx10<mimgopc op, string opcode,
string dns="">
: MIMG_nsa_gfx10<op.GFX10M, (outs DataRC:$vdata), num_addrs, dns> {
let InOperandList = !con(AddrIns,
- (ins SReg_256:$srsrc, DMask:$dmask,
+ (ins SReg_256_XNULL:$srsrc, DMask:$dmask,
Dim:$dim, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, A16:$a16, TFE:$tfe, LWE:$lwe),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
@@ -472,7 +472,7 @@ class MIMG_NoSampler_gfx11<mimgopc op, string opcode,
RegisterClass DataRC, RegisterClass AddrRC,
string dns="">
: MIMG_gfx11<op.GFX11, (outs DataRC:$vdata), dns> {
- let InOperandList = !con((ins AddrRC:$vaddr0, SReg_256:$srsrc, DMask:$dmask,
+ let InOperandList = !con((ins AddrRC:$vaddr0, SReg_256_XNULL:$srsrc, DMask:$dmask,
Dim:$dim, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, A16:$a16, TFE:$tfe, LWE:$lwe),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
@@ -485,7 +485,7 @@ class MIMG_NoSampler_nsa_gfx11<mimgopc op, string opcode,
string dns="">
: MIMG_nsa_gfx11<op.GFX11, (outs DataRC:$vdata), num_addrs, dns> {
let InOperandList = !con(AddrIns,
- (ins SReg_256:$srsrc, DMask:$dmask,
+ (ins SReg_256_XNULL:$srsrc, DMask:$dmask,
Dim:$dim, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, A16:$a16, TFE:$tfe, LWE:$lwe),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
@@ -498,7 +498,7 @@ class VIMAGE_NoSampler_gfx12<mimgopc op, string opcode,
string dns="">
: VIMAGE_gfx12<op.GFX12, (outs DataRC:$vdata), num_addrs, dns> {
let InOperandList = !con(AddrIns,
- (ins SReg_256:$rsrc, DMask:$dmask, Dim:$dim,
+ (ins SReg_256_XNULL:$rsrc, DMask:$dmask, Dim:$dim,
CPol:$cpol, R128A16:$r128, A16:$a16, TFE:$tfe),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
let AsmString = opcode#" $vdata, "#AddrAsm#", $rsrc$dmask$dim$cpol$r128$a16$tfe"
@@ -510,8 +510,8 @@ class VSAMPLE_Sampler_gfx12<mimgopc op, string opcode, RegisterClass DataRC,
string dns="">
: VSAMPLE_gfx12<op.GFX12, (outs DataRC:$vdata), num_addrs, dns, Addr3RC> {
let InOperandList = !con(AddrIns,
- (ins SReg_256:$rsrc),
- !if(BaseOpcode.Sampler, (ins SReg_128:$samp), (ins)),
+ (ins SReg_256_XNULL:$rsrc),
+ !if(BaseOpcode.Sampler, (ins SReg_128_XNULL:$samp), (ins)),
(ins DMask:$dmask, Dim:$dim, UNorm:$unorm,
CPol:$cpol, R128A16:$r128, A16:$a16, TFE:$tfe,
LWE:$lwe),
@@ -527,8 +527,8 @@ class VSAMPLE_Sampler_nortn_gfx12<mimgopc op, string opcode,
string dns="">
: VSAMPLE_gfx12<op.GFX12, (outs), num_addrs, dns, Addr3RC> {
let InOperandList = !con(AddrIns,
- (ins SReg_256:$rsrc),
- !if(BaseOpcode.Sampler, (ins SReg_128:$samp), (ins)),
+ (ins SReg_256_XNULL:$rsrc),
+ !if(BaseOpcode.Sampler, (ins SReg_128_XNULL:$samp), (ins)),
(ins DMask:$dmask, Dim:$dim, UNorm:$unorm,
CPol:$cpol, R128A16:$r128, A16:$a16, TFE:$tfe,
LWE:$lwe),
@@ -679,7 +679,7 @@ class MIMG_Store_Helper <mimgopc op, string asm,
RegisterClass addr_rc,
string dns = "">
: MIMG_gfx6789<op.GFX10M, (outs), dns> {
- let InOperandList = !con((ins data_rc:$vdata, addr_rc:$vaddr, SReg_256:$srsrc,
+ let InOperandList = !con((ins data_rc:$vdata, addr_rc:$vaddr, SReg_256_XNULL:$srsrc,
DMask:$dmask, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
@@ -693,7 +693,7 @@ class MIMG_Store_Helper_gfx90a <mimgopc op, string asm,
string dns = "">
: MIMG_gfx90a<op.GFX10M, (outs), dns> {
let InOperandList = !con((ins getLdStRegisterOperand<data_rc>.ret:$vdata,
- addr_rc:$vaddr, SReg_256:$srsrc,
+ addr_rc:$vaddr, SReg_256_XNULL:$srsrc,
DMask:$dmask, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, LWE:$lwe, DA:$da),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
@@ -705,7 +705,7 @@ class MIMG_Store_gfx10<mimgopc op, string opcode,
RegisterClass DataRC, RegisterClass AddrRC,
string dns="">
: MIMG_gfx10<op.GFX10M, (outs), dns> {
- let InOperandList = !con((ins DataRC:$vdata, AddrRC:$vaddr0, SReg_256:$srsrc,
+ let InOperandList = !con((ins DataRC:$vdata, AddrRC:$vaddr0, SReg_256_XNULL:$srsrc,
DMask:$dmask, Dim:$dim, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, A16:$a16, TFE:$tfe, LWE:$lwe),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
@@ -719,7 +719,7 @@ class MIMG_Store_nsa_gfx10<mimgopc op, string opcode,
: MIMG_nsa_gfx10<op.GFX10M, (outs), num_addrs, dns> {
let InOperandList = !con((ins DataRC:$vdata),
AddrIns,
- (ins SReg_256:$srsrc, DMask:$dmask,
+ (ins SReg_256_XNULL:$srsrc, DMask:$dmask,
Dim:$dim, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, A16:$a16, TFE:$tfe, LWE:$lwe),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
@@ -731,7 +731,7 @@ class MIMG_Store_gfx11<mimgopc op, string opcode,
RegisterClass DataRC, RegisterClass AddrRC,
string dns="">
: MIMG_gfx11<op.GFX11, (outs), dns> {
- let InOperandList = !con((ins DataRC:$vdata, AddrRC:$vaddr0, SReg_256:$srsrc,
+ let InOperandList = !con((ins DataRC:$vdata, AddrRC:$vaddr0, SReg_256_XNULL:$srsrc,
DMask:$dmask, Dim:$dim, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, A16:$a16, TFE:$tfe, LWE:$lwe),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
@@ -745,7 +745,7 @@ class MIMG_Store_nsa_gfx11<mimgopc op, string opcode,
: MIMG_nsa_gfx11<op.GFX11, (outs), num_addrs, dns> {
let InOperandList = !con((ins DataRC:$vdata),
AddrIns,
- (ins SReg_256:$srsrc, DMask:$dmask,
+ (ins SReg_256_XNULL:$srsrc, DMask:$dmask,
Dim:$dim, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, A16:$a16, TFE:$tfe, LWE:$lwe),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
@@ -759,7 +759,7 @@ class VIMAGE_Store_gfx12<mimgopc op, string opcode,
: VIMAGE_gfx12<op.GFX12, (outs), num_addrs, dns> {
let InOperandList = !con((ins DataRC:$vdata),
AddrIns,
- (ins SReg_256:$rsrc, DMask:$dmask, Dim:$dim,
+ (ins SReg_256_XNULL:$rsrc, DMask:$dmask, Dim:$dim,
CPol:$cpol, R128A16:$r128, A16:$a16, TFE:$tfe),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
let AsmString = opcode#" $vdata, "#AddrAsm#", $rsrc$dmask$dim$cpol$r128$a16$tfe"
@@ -875,7 +875,7 @@ class MIMG_Atomic_gfx6789_base <bits<8> op, string asm, RegisterClass data_rc,
: MIMG_gfx6789 <op, (outs data_rc:$vdst), dns> {
let Constraints = "$vdst = $vdata";
- let InOperandList = (ins data_rc:$vdata, addr_rc:$vaddr, SReg_256:$srsrc,
+ let InOperandList = (ins data_rc:$vdata, addr_rc:$vaddr, SReg_256_XNULL:$srsrc,
DMask:$dmask, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da);
let AsmString = asm#" $vdst, $vaddr, $srsrc$dmask$unorm$cpol$r128$tfe$lwe$da";
@@ -887,7 +887,7 @@ class MIMG_Atomic_gfx90a_base <bits<8> op, string asm, RegisterClass data_rc,
let Constraints = "$vdst = $vdata";
let InOperandList = (ins getLdStRegisterOperand<data_rc>.ret:$vdata,
- addr_rc:$vaddr, SReg_256:$srsrc,
+ addr_rc:$vaddr, SReg_256_XNULL:$srsrc,
DMask:$dmask, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, LWE:$lwe, DA:$da);
let AsmString = asm#" $vdst, $vaddr, $srsrc$dmask$unorm$cpol$r128$lwe$da";
@@ -921,7 +921,7 @@ class MIMG_Atomic_gfx10<mimgopc op, string opcode,
!if(enableDisasm, "GFX10", "")> {
let Constraints = "$vdst = $vdata";
- let InOperandList = (ins DataRC:$vdata, AddrRC:$vaddr0, SReg_256:$srsrc,
+ let InOperandList = (ins DataRC:$vdata, AddrRC:$vaddr0, SReg_256_XNULL:$srsrc,
DMask:$dmask, Dim:$dim, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, A16:$a16, TFE:$tfe, LWE:$lwe);
let AsmString = opcode#" $vdst, $vaddr0, $srsrc$dmask$dim$unorm$cpol$r128$a16$tfe$lwe";
@@ -936,7 +936,7 @@ class MIMG_Atomic_nsa_gfx10<mimgopc op, string opcode,
let InOperandList = !con((ins DataRC:$vdata),
AddrIns,
- (ins SReg_256:$srsrc, DMask:$dmask,
+ (ins SReg_256_XNULL:$srsrc, DMask:$dmask,
Dim:$dim, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, A16:$a16, TFE:$tfe, LWE:$lwe));
let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$cpol$r128$a16$tfe$lwe";
@@ -949,7 +949,7 @@ class MIMG_Atomic_gfx11<mimgopc op, string opcode,
!if(enableDisasm, "GFX11", "")> {
let Constraints = "$vdst = $vdata";
- let InOperandList = (ins DataRC:$vdata, AddrRC:$vaddr0, SReg_256:$srsrc,
+ let InOperandList = (ins DataRC:$vdata, AddrRC:$vaddr0, SReg_256_XNULL:$srsrc,
DMask:$dmask, Dim:$dim, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, A16:$a16, TFE:$tfe, LWE:$lwe);
let AsmString = opcode#" $vdst, $vaddr0, $srsrc$dmask$dim$unorm$cpol$r128$a16$tfe$lwe";
@@ -964,7 +964,7 @@ class MIMG_Atomic_nsa_gfx11<mimgopc op, string opcode,
let InOperandList = !con((ins DataRC:$vdata),
AddrIns,
- (ins SReg_256:$srsrc, DMask:$dmask,
+ (ins SReg_256_XNULL:$srsrc, DMask:$dmask,
Dim:$dim, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, A16:$a16, TFE:$tfe, LWE:$lwe));
let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$cpol$r128$a16$tfe$lwe";
@@ -978,7 +978,7 @@ class VIMAGE_Atomic_gfx12<mimgopc op, string opcode, RegisterClass DataRC,
let InOperandList = !con((ins DataRC:$vdata),
AddrIns,
- (ins SReg_256:$rsrc, DMask:$dmask, Dim:$dim,
+ (ins SReg_256_XNULL:$rsrc, DMask:$dmask, Dim:$dim,
CPol:$cpol, R128A16:$r128, A16:$a16, TFE:$tfe));
let AsmString = !if(!empty(renamed), opcode, renamed)#" $vdata, "#AddrAsm#
", $rsrc$dmask$dim$cpol$r128$a16$tfe";
@@ -1128,7 +1128,7 @@ multiclass MIMG_Atomic_Renamed <mimgopc op, string asm, string renamed,
class MIMG_Sampler_Helper <mimgopc op, string asm, RegisterClass dst_rc,
RegisterClass src_rc, string dns="">
: MIMG_gfx6789 <op.VI, (outs dst_rc:$vdata), dns> {
- let InOperandList = !con((ins src_rc:$vaddr, SReg_256:$srsrc, SReg_128:$ssamp,
+ let InOperandList = !con((ins src_rc:$vaddr, SReg_256_XNULL:$srsrc, SReg_128_XNULL:$ssamp,
DMask:$dmask, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
@@ -1139,7 +1139,7 @@ class MIMG_Sampler_Helper <mimgopc op, string asm, RegisterClass dst_rc,
class MIMG_Sampler_gfx90a<mimgopc op, string asm, RegisterClass dst_rc,
RegisterClass src_rc, string dns="">
: MIMG_gfx90a<op.GFX10M, (outs getLdStRegisterOperand<dst_rc>.ret:$vdata), dns> {
- let InOperandList = !con((ins src_rc:$vaddr, SReg_256:$srsrc, SReg_128:$ssamp,
+ let InOperandList = !con((ins src_rc:$vaddr, SReg_256_XNULL:$srsrc, SReg_128_XNULL:$ssamp,
DMask:$dmask, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, LWE:$lwe, DA:$da),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
@@ -1149,7 +1149,7 @@ class MIMG_Sampler_gfx90a<mimgopc op, string asm, RegisterClass dst_rc,
class MIMG_Sampler_OpList_gfx10p<dag OpPrefix, bit HasD16> {
dag ret = !con(OpPrefix,
- (ins SReg_256:$srsrc, SReg_128:$ssamp,
+ (ins SReg_256_XNULL:$srsrc, SReg_128_XNULL:$ssamp,
DMask:$dmask, Dim:$dim, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, A16:$a16, TFE:$tfe, LWE:$lwe),
!if(HasD16, (ins D16:$d16), (ins)));
@@ -1524,7 +1524,7 @@ class MIMG_IntersectRay_Helper<bit Is64, bit IsA16> {
class MIMG_IntersectRay_gfx10<mimgopc op, string opcode, RegisterClass AddrRC>
: MIMG_gfx10<op.GFX10M, (outs VReg_128:$vdata), "GFX10"> {
- let InOperandList = (ins AddrRC:$vaddr0, SReg_128:$srsrc, A16:$a16);
+ let InOperandList = (ins AddrRC:$vaddr0, SReg_128_XNULL:$srsrc, A16:$a16);
let AsmString = opcode#" $vdata, $vaddr0, $srsrc$a16";
let nsa = 0;
@@ -1532,13 +1532,13 @@ class MIMG_IntersectRay_gfx10<mimgopc op, string opcode, RegisterClass AddrRC>
class MIMG_IntersectRay_nsa_gfx10<mimgopc op, string opcode, int num_addrs>
: MIMG_nsa_gfx10<op.GFX10M, (outs VReg_128:$vdata), num_addrs, "GFX10"> {
- let InOperandList = !con(nsah.AddrIns, (ins SReg_128:$srsrc, A16:$a16));
+ let InOperandList = !con(nsah.AddrIns, (ins SReg_128_XNULL:$srsrc, A16:$a16));
let AsmString = opcode#" $vdata, "#nsah.AddrAsm#", $srsrc$a16";
}
class MIMG_IntersectRay_gfx11<mimgopc op, string opcode, RegisterClass AddrRC>
: MIMG_gfx11<op.GFX11, (outs VReg_128:$vdata), "GFX11"> {
- let InOperandList = (ins AddrRC:$vaddr0, SReg_128:$srsrc, A16:$a16);
+ let InOperandList = (ins AddrRC:$vaddr0, SReg_128_XNULL:$srsrc, A16:$a16);
let AsmString = opcode#" $vdata, $vaddr0, $srsrc$a16";
let nsa = 0;
@@ -1548,7 +1548,7 @@ class MIMG_IntersectRay_nsa_gfx11<mimgopc op, string opcode, int num_addrs,
list<RegisterClass> addr_types>
: MIMG_nsa_gfx11<op.GFX11, (outs VReg_128:$vdata), num_addrs, "GFX11",
addr_types> {
- let InOperandList = !con(nsah.AddrIns, (ins SReg_128:$srsrc, A16:$a16));
+ let InOperandList = !con(nsah.AddrIns, (ins SReg_128_XNULL:$srsrc, A16:$a16));
let AsmString = opcode#" $vdata, "#nsah.AddrAsm#", $srsrc$a16";
}
@@ -1556,7 +1556,7 @@ class VIMAGE_IntersectRay_gfx12<mimgopc op, string opcode, int num_addrs,
list<RegisterClass> addr_types>
: VIMAGE_gfx12<op.GFX12, (outs VReg_128:$vdata),
num_addrs, "GFX12", addr_types> {
- let InOperandList = !con(nsah.AddrIns, (ins SReg_128:$rsrc, A16:$a16));
+ let InOperandList = !con(nsah.AddrIns, (ins SReg_128_XNULL:$rsrc, A16:$a16));
let AsmString = opcode#" $vdata, "#nsah.AddrAsm#", $rsrc$a16";
}
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 4fb5cb0..2bc1913 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -199,7 +199,7 @@ static unsigned macToMad(unsigned Opc) {
case AMDGPU::V_FMAC_F16_e64:
return AMDGPU::V_FMA_F16_gfx9_e64;
case AMDGPU::V_FMAC_F16_fake16_e64:
- return AMDGPU::V_FMA_F16_gfx9_e64;
+ return AMDGPU::V_FMA_F16_gfx9_fake16_e64;
case AMDGPU::V_FMAC_LEGACY_F32_e64:
return AMDGPU::V_FMA_LEGACY_F32_e64;
case AMDGPU::V_FMAC_F64_e64:
@@ -1096,21 +1096,8 @@ void SIFoldOperandsImpl::foldOperand(
B.addImm(Defs[I].second);
}
LLVM_DEBUG(dbgs() << "Folded " << *UseMI);
- return;
}
- if (Size != 4)
- return;
-
- Register Reg0 = UseMI->getOperand(0).getReg();
- Register Reg1 = UseMI->getOperand(1).getReg();
- if (TRI->isAGPR(*MRI, Reg0) && TRI->isVGPR(*MRI, Reg1))
- UseMI->setDesc(TII->get(AMDGPU::V_ACCVGPR_WRITE_B32_e64));
- else if (TRI->isVGPR(*MRI, Reg0) && TRI->isAGPR(*MRI, Reg1))
- UseMI->setDesc(TII->get(AMDGPU::V_ACCVGPR_READ_B32_e64));
- else if (ST->hasGFX90AInsts() && TRI->isAGPR(*MRI, Reg0) &&
- TRI->isAGPR(*MRI, Reg1))
- UseMI->setDesc(TII->get(AMDGPU::V_ACCVGPR_MOV_B32));
return;
}
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 58b061f..0ac84f4 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -4017,29 +4017,26 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
}
// This is similar to the default implementation in ExpandDYNAMIC_STACKALLOC,
-// except for stack growth direction(default: downwards, AMDGPU: upwards) and
-// applying the wave size scale to the increment amount.
-SDValue SITargetLowering::lowerDYNAMIC_STACKALLOCImpl(SDValue Op,
- SelectionDAG &DAG) const {
+// except for:
+// 1. Stack growth direction(default: downwards, AMDGPU: upwards), and
+// 2. Scale size where, scale = wave-reduction(alloca-size) * wave-size
+SDValue SITargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
+ SelectionDAG &DAG) const {
const MachineFunction &MF = DAG.getMachineFunction();
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
SDLoc dl(Op);
EVT VT = Op.getValueType();
- SDValue Tmp1 = Op;
- SDValue Tmp2 = Op.getValue(1);
- SDValue Tmp3 = Op.getOperand(2);
- SDValue Chain = Tmp1.getOperand(0);
-
+ SDValue Chain = Op.getOperand(0);
Register SPReg = Info->getStackPtrOffsetReg();
// Chain the dynamic stack allocation so that it doesn't modify the stack
// pointer when other instructions are using the stack.
Chain = DAG.getCALLSEQ_START(Chain, 0, 0, dl);
- SDValue Size = Tmp2.getOperand(1);
+ SDValue Size = Op.getOperand(1);
SDValue BaseAddr = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
- Align Alignment = cast<ConstantSDNode>(Tmp3)->getAlignValue();
+ Align Alignment = cast<ConstantSDNode>(Op.getOperand(2))->getAlignValue();
const TargetFrameLowering *TFL = Subtarget->getFrameLowering();
assert(TFL->getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp &&
@@ -4057,30 +4054,36 @@ SDValue SITargetLowering::lowerDYNAMIC_STACKALLOCImpl(SDValue Op,
DAG.getSignedConstant(-ScaledAlignment, dl, VT));
}
- SDValue ScaledSize = DAG.getNode(
- ISD::SHL, dl, VT, Size,
- DAG.getConstant(Subtarget->getWavefrontSizeLog2(), dl, MVT::i32));
-
- SDValue NewSP = DAG.getNode(ISD::ADD, dl, VT, BaseAddr, ScaledSize); // Value
+ assert(Size.getValueType() == MVT::i32 && "Size must be 32-bit");
+ SDValue NewSP;
+ if (isa<ConstantSDNode>(Size)) {
+ // For constant sized alloca, scale alloca size by wave-size
+ SDValue ScaledSize = DAG.getNode(
+ ISD::SHL, dl, VT, Size,
+ DAG.getConstant(Subtarget->getWavefrontSizeLog2(), dl, MVT::i32));
+ NewSP = DAG.getNode(ISD::ADD, dl, VT, BaseAddr, ScaledSize); // Value
+ } else {
+ // For dynamic sized alloca, perform wave-wide reduction to get max of
+ // alloca size(divergent) and then scale it by wave-size
+ SDValue WaveReduction =
+ DAG.getTargetConstant(Intrinsic::amdgcn_wave_reduce_umax, dl, MVT::i32);
+ Size = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32, WaveReduction,
+ Size, DAG.getConstant(0, dl, MVT::i32));
+ SDValue ScaledSize = DAG.getNode(
+ ISD::SHL, dl, VT, Size,
+ DAG.getConstant(Subtarget->getWavefrontSizeLog2(), dl, MVT::i32));
+ NewSP =
+ DAG.getNode(ISD::ADD, dl, VT, BaseAddr, ScaledSize); // Value in vgpr.
+ SDValue ReadFirstLaneID =
+ DAG.getTargetConstant(Intrinsic::amdgcn_readfirstlane, dl, MVT::i32);
+ NewSP = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32, ReadFirstLaneID,
+ NewSP);
+ }
Chain = DAG.getCopyToReg(Chain, dl, SPReg, NewSP); // Output chain
- Tmp2 = DAG.getCALLSEQ_END(Chain, 0, 0, SDValue(), dl);
+ SDValue CallSeqEnd = DAG.getCALLSEQ_END(Chain, 0, 0, SDValue(), dl);
- return DAG.getMergeValues({BaseAddr, Tmp2}, dl);
-}
-
-SDValue SITargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
- SelectionDAG &DAG) const {
- // We only handle constant sizes here to allow non-entry block, static sized
- // allocas. A truly dynamic value is more difficult to support because we
- // don't know if the size value is uniform or not. If the size isn't uniform,
- // we would need to do a wave reduction to get the maximum size to know how
- // much to increment the uniform stack pointer.
- SDValue Size = Op.getOperand(1);
- if (isa<ConstantSDNode>(Size))
- return lowerDYNAMIC_STACKALLOCImpl(Op, DAG); // Use "generic" expansion.
-
- return AMDGPUTargetLowering::LowerDYNAMIC_STACKALLOC(Op, DAG);
+ return DAG.getMergeValues({BaseAddr, CallSeqEnd}, dl);
}
SDValue SITargetLowering::LowerSTACKSAVE(SDValue Op, SelectionDAG &DAG) const {
@@ -13982,6 +13985,43 @@ SDValue SITargetLowering::tryFoldToMad64_32(SDNode *N,
return Accum;
}
+SDValue
+SITargetLowering::foldAddSub64WithZeroLowBitsTo32(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ SDValue RHS = N->getOperand(1);
+ auto *CRHS = dyn_cast<ConstantSDNode>(RHS);
+ if (!CRHS)
+ return SDValue();
+
+ // TODO: Worth using computeKnownBits? Maybe expensive since it's so
+ // common.
+ uint64_t Val = CRHS->getZExtValue();
+ if (countr_zero(Val) >= 32) {
+ SelectionDAG &DAG = DCI.DAG;
+ SDLoc SL(N);
+ SDValue LHS = N->getOperand(0);
+
+ // Avoid carry machinery if we know the low half of the add does not
+ // contribute to the final result.
+ //
+ // add i64:x, K if computeTrailingZeros(K) >= 32
+ // => build_pair (add x.hi, K.hi), x.lo
+
+ // Breaking the 64-bit add here with this strange constant is unlikely
+ // to interfere with addressing mode patterns.
+
+ SDValue Hi = getHiHalf64(LHS, DAG);
+ SDValue ConstHi32 = DAG.getConstant(Hi_32(Val), SL, MVT::i32);
+ SDValue AddHi =
+ DAG.getNode(N->getOpcode(), SL, MVT::i32, Hi, ConstHi32, N->getFlags());
+
+ SDValue Lo = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, LHS);
+ return DAG.getNode(ISD::BUILD_PAIR, SL, MVT::i64, Lo, AddHi);
+ }
+
+ return SDValue();
+}
+
// Collect the ultimate src of each of the mul node's operands, and confirm
// each operand is 8 bytes.
static std::optional<ByteProvider<SDValue>>
@@ -14258,6 +14298,11 @@ SDValue SITargetLowering::performAddCombine(SDNode *N,
return V;
}
+ if (VT == MVT::i64) {
+ if (SDValue Folded = foldAddSub64WithZeroLowBitsTo32(N, DCI))
+ return Folded;
+ }
+
if ((isMul(LHS) || isMul(RHS)) && Subtarget->hasDot7Insts() &&
(Subtarget->hasDot1Insts() || Subtarget->hasDot8Insts())) {
SDValue TempNode(N, 0);
@@ -14443,6 +14488,11 @@ SDValue SITargetLowering::performSubCombine(SDNode *N,
SelectionDAG &DAG = DCI.DAG;
EVT VT = N->getValueType(0);
+ if (VT == MVT::i64) {
+ if (SDValue Folded = foldAddSub64WithZeroLowBitsTo32(N, DCI))
+ return Folded;
+ }
+
if (VT != MVT::i32)
return SDValue();
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index 631f265..299c8f5 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -212,6 +212,9 @@ private:
unsigned getFusedOpcode(const SelectionDAG &DAG,
const SDNode *N0, const SDNode *N1) const;
SDValue tryFoldToMad64_32(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue foldAddSub64WithZeroLowBitsTo32(SDNode *N,
+ DAGCombinerInfo &DCI) const;
+
SDValue performAddCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performAddCarrySubCarryCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performSubCombine(SDNode *N, DAGCombinerInfo &DCI) const;
@@ -421,7 +424,6 @@ public:
SDValue LowerCall(CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const override;
- SDValue lowerDYNAMIC_STACKALLOCImpl(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSTACKSAVE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index f97ea40..e6f333f 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -3805,6 +3805,36 @@ static void updateLiveVariables(LiveVariables *LV, MachineInstr &MI,
}
}
+static unsigned getNewFMAInst(const GCNSubtarget &ST, unsigned Opc) {
+ switch (Opc) {
+ case AMDGPU::V_MAC_F16_e32:
+ case AMDGPU::V_MAC_F16_e64:
+ return AMDGPU::V_MAD_F16_e64;
+ case AMDGPU::V_MAC_F32_e32:
+ case AMDGPU::V_MAC_F32_e64:
+ return AMDGPU::V_MAD_F32_e64;
+ case AMDGPU::V_MAC_LEGACY_F32_e32:
+ case AMDGPU::V_MAC_LEGACY_F32_e64:
+ return AMDGPU::V_MAD_LEGACY_F32_e64;
+ case AMDGPU::V_FMAC_LEGACY_F32_e32:
+ case AMDGPU::V_FMAC_LEGACY_F32_e64:
+ return AMDGPU::V_FMA_LEGACY_F32_e64;
+ case AMDGPU::V_FMAC_F16_e32:
+ case AMDGPU::V_FMAC_F16_e64:
+ case AMDGPU::V_FMAC_F16_fake16_e64:
+ return ST.hasTrue16BitInsts() ? AMDGPU::V_FMA_F16_gfx9_fake16_e64
+ : AMDGPU::V_FMA_F16_gfx9_e64;
+ case AMDGPU::V_FMAC_F32_e32:
+ case AMDGPU::V_FMAC_F32_e64:
+ return AMDGPU::V_FMA_F32_e64;
+ case AMDGPU::V_FMAC_F64_e32:
+ case AMDGPU::V_FMAC_F64_e64:
+ return AMDGPU::V_FMA_F64_e64;
+ default:
+ llvm_unreachable("invalid instruction");
+ }
+}
+
MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
LiveVariables *LV,
LiveIntervals *LIS) const {
@@ -4040,14 +4070,8 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
if (Src0Literal && !ST.hasVOP3Literal())
return nullptr;
- unsigned NewOpc = IsFMA ? IsF16 ? AMDGPU::V_FMA_F16_gfx9_e64
- : IsF64 ? AMDGPU::V_FMA_F64_e64
- : IsLegacy
- ? AMDGPU::V_FMA_LEGACY_F32_e64
- : AMDGPU::V_FMA_F32_e64
- : IsF16 ? AMDGPU::V_MAD_F16_e64
- : IsLegacy ? AMDGPU::V_MAD_LEGACY_F32_e64
- : AMDGPU::V_MAD_F32_e64;
+ unsigned NewOpc = getNewFMAInst(ST, Opc);
+
if (pseudoToMCOpcode(NewOpc) == -1)
return nullptr;
@@ -6866,9 +6890,8 @@ SIInstrInfo::legalizeOperands(MachineInstr &MI,
AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc);
if (RsrcIdx != -1) {
MachineOperand *Rsrc = &MI.getOperand(RsrcIdx);
- if (Rsrc->isReg() && !RI.isSGPRClass(MRI.getRegClass(Rsrc->getReg()))) {
+ if (Rsrc->isReg() && !RI.isSGPRReg(MRI, Rsrc->getReg()))
isRsrcLegal = false;
- }
}
// The operands are legal.
@@ -9294,6 +9317,7 @@ static bool isRenamedInGFX9(int Opcode) {
case AMDGPU::V_DIV_FIXUP_F16_gfx9_e64:
case AMDGPU::V_DIV_FIXUP_F16_gfx9_fake16_e64:
case AMDGPU::V_FMA_F16_gfx9_e64:
+ case AMDGPU::V_FMA_F16_gfx9_fake16_e64:
case AMDGPU::V_INTERP_P2_F16:
case AMDGPU::V_MAD_F16_e64:
case AMDGPU::V_MAD_U16_e64:
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 789ce88..ee83dff 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -2674,8 +2674,8 @@ let OtherPredicates = [NotHasTrue16BitInsts] in {
} // end OtherPredicates = [NotHasTrue16BitInsts]
let OtherPredicates = [HasTrue16BitInsts] in {
- def : FPToI1Pat<V_CMP_EQ_F16_t16_e64, CONST.FP16_ONE, i16, f16, fp_to_uint>;
- def : FPToI1Pat<V_CMP_EQ_F16_t16_e64, CONST.FP16_NEG_ONE, i16, f16, fp_to_sint>;
+ def : FPToI1Pat<V_CMP_EQ_F16_fake16_e64, CONST.FP16_ONE, i16, f16, fp_to_uint>;
+ def : FPToI1Pat<V_CMP_EQ_F16_fake16_e64, CONST.FP16_NEG_ONE, i16, f16, fp_to_sint>;
} // end OtherPredicates = [HasTrue16BitInsts]
def : FPToI1Pat<V_CMP_EQ_F32_e64, CONST.FP32_ONE, i32, f32, fp_to_uint>;
@@ -3055,7 +3055,7 @@ def : GCNPat<
(V_BFREV_B32_e64 (i32 (EXTRACT_SUBREG VReg_64:$a, sub0))), sub1)>;
// If fcanonicalize's operand is implicitly canonicalized, we only need a copy.
-let AddedComplexity = 1000 in {
+let AddedComplexity = 8 in {
foreach vt = [f16, v2f16, f32, v2f32, f64] in {
def : GCNPat<
(fcanonicalize (vt is_canonicalized:$src)),
@@ -3710,12 +3710,15 @@ def : IntMinMaxPat<V_MAXMIN_U32_e64, umin, umax_oneuse>;
def : IntMinMaxPat<V_MINMAX_U32_e64, umax, umin_oneuse>;
def : FPMinMaxPat<V_MINMAX_F32_e64, f32, fmaxnum_like, fminnum_like_oneuse>;
def : FPMinMaxPat<V_MAXMIN_F32_e64, f32, fminnum_like, fmaxnum_like_oneuse>;
-def : FPMinMaxPat<V_MINMAX_F16_e64, f16, fmaxnum_like, fminnum_like_oneuse>;
-def : FPMinMaxPat<V_MAXMIN_F16_e64, f16, fminnum_like, fmaxnum_like_oneuse>;
def : FPMinCanonMaxPat<V_MINMAX_F32_e64, f32, fmaxnum_like, fminnum_like_oneuse>;
def : FPMinCanonMaxPat<V_MAXMIN_F32_e64, f32, fminnum_like, fmaxnum_like_oneuse>;
-def : FPMinCanonMaxPat<V_MINMAX_F16_e64, f16, fmaxnum_like, fminnum_like_oneuse>;
-def : FPMinCanonMaxPat<V_MAXMIN_F16_e64, f16, fminnum_like, fmaxnum_like_oneuse>;
+}
+
+let True16Predicate = UseFakeTrue16Insts in {
+def : FPMinMaxPat<V_MINMAX_F16_fake16_e64, f16, fmaxnum_like, fminnum_like_oneuse>;
+def : FPMinMaxPat<V_MAXMIN_F16_fake16_e64, f16, fminnum_like, fmaxnum_like_oneuse>;
+def : FPMinCanonMaxPat<V_MINMAX_F16_fake16_e64, f16, fmaxnum_like, fminnum_like_oneuse>;
+def : FPMinCanonMaxPat<V_MAXMIN_F16_fake16_e64, f16, fminnum_like, fmaxnum_like_oneuse>;
}
let SubtargetPredicate = isGFX9Plus in {
@@ -3723,6 +3726,10 @@ let True16Predicate = NotHasTrue16BitInsts in {
defm : Int16Med3Pat<V_MED3_I16_e64, smin, smax, VSrc_b16>;
defm : Int16Med3Pat<V_MED3_U16_e64, umin, umax, VSrc_b16>;
}
+let True16Predicate = UseRealTrue16Insts in {
+ defm : Int16Med3Pat<V_MED3_I16_t16_e64, smin, smax, VSrcT_b16>;
+ defm : Int16Med3Pat<V_MED3_U16_t16_e64, umin, umax, VSrcT_b16>;
+}
let True16Predicate = UseFakeTrue16Insts in {
defm : Int16Med3Pat<V_MED3_I16_fake16_e64, smin, smax, VSrc_b16>;
defm : Int16Med3Pat<V_MED3_U16_fake16_e64, umin, umax, VSrc_b16>;
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index 16a7a9c..7c98ccd 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -153,14 +153,16 @@ class SIRegisterClass <string n, list<ValueType> rTypes, int Align, dag rList>
}
multiclass SIRegLoHi16 <string n, bits<8> regIdx, bit ArtificialHigh = 1,
- bit isVGPR = 0, bit isAGPR = 0> {
+ bit isVGPR = 0, bit isAGPR = 0,
+ list<int> DwarfEncodings = [-1, -1]> {
def _LO16 : SIReg<n#".l", regIdx, isVGPR, isAGPR>;
def _HI16 : SIReg<!if(ArtificialHigh, "", n#".h"), regIdx, isVGPR, isAGPR,
/* isHi16 */ 1> {
let isArtificial = ArtificialHigh;
}
def "" : RegisterWithSubRegs<n, [!cast<Register>(NAME#"_LO16"),
- !cast<Register>(NAME#"_HI16")]> {
+ !cast<Register>(NAME#"_HI16")]>,
+ DwarfRegNum<DwarfEncodings> {
let Namespace = "AMDGPU";
let SubRegIndices = [lo16, hi16];
let CoveredBySubRegs = !not(ArtificialHigh);
@@ -197,7 +199,8 @@ def VCC : RegisterWithSubRegs<"vcc", [VCC_LO, VCC_HI]> {
let HWEncoding = VCC_LO.HWEncoding;
}
-defm EXEC_LO : SIRegLoHi16<"exec_lo", 126>, DwarfRegNum<[1, 1]>;
+defm EXEC_LO : SIRegLoHi16<"exec_lo", 126, /*ArtificialHigh=*/1, /*isVGPR=*/0,
+ /*isAGPR=*/0, /*DwarfEncodings=*/[1, 1]>;
defm EXEC_HI : SIRegLoHi16<"exec_hi", 127>;
def EXEC : RegisterWithSubRegs<"exec", [EXEC_LO, EXEC_HI]>, DwarfRegNum<[17, 1]> {
@@ -337,25 +340,26 @@ def FLAT_SCR : FlatReg<FLAT_SCR_LO, FLAT_SCR_HI, 0>;
// SGPR registers
foreach Index = 0...105 in {
defm SGPR#Index :
- SIRegLoHi16 <"s"#Index, Index>,
- DwarfRegNum<[!if(!le(Index, 63), !add(Index, 32), !add(Index, 1024)),
- !if(!le(Index, 63), !add(Index, 32), !add(Index, 1024))]>;
+ SIRegLoHi16 <"s"#Index, Index, /*ArtificialHigh=*/1,
+ /*isVGPR=*/0, /*isAGPR=*/0, /*DwarfEncodings=*/
+ [!if(!le(Index, 63), !add(Index, 32), !add(Index, 1024)),
+ !if(!le(Index, 63), !add(Index, 32), !add(Index, 1024))]>;
}
// VGPR registers
foreach Index = 0...255 in {
defm VGPR#Index :
- SIRegLoHi16 <"v"#Index, Index, /* ArtificialHigh= */ 0,
- /* isVGPR= */ 1, /* isAGPR= */ 0>,
- DwarfRegNum<[!add(Index, 2560), !add(Index, 1536)]>;
+ SIRegLoHi16 <"v"#Index, Index, /*ArtificialHigh=*/ 0,
+ /*isVGPR=*/ 1, /*isAGPR=*/ 0, /*DwarfEncodings=*/
+ [!add(Index, 2560), !add(Index, 1536)]>;
}
// AccVGPR registers
foreach Index = 0...255 in {
defm AGPR#Index :
- SIRegLoHi16 <"a"#Index, Index, /* ArtificialHigh= */ 1,
- /* isVGPR= */ 0, /* isAGPR= */ 1>,
- DwarfRegNum<[!add(Index, 3072), !add(Index, 2048)]>;
+ SIRegLoHi16 <"a"#Index, Index, /*ArtificialHigh=*/ 1,
+ /*isVGPR=*/ 0, /*isAGPR=*/ 1, /*DwarfEncodings=*/
+ [!add(Index, 3072), !add(Index, 2048)]>;
}
//===----------------------------------------------------------------------===//
@@ -809,6 +813,9 @@ def SReg_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16,
let BaseClassOrder = 32;
}
+def SGPR_NULL128 : SIReg<"null">;
+def SGPR_NULL256 : SIReg<"null">;
+
let GeneratePressureSet = 0 in {
def SRegOrLds_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16], 32,
(add SReg_32, LDS_DIRECT_CLASS)> {
@@ -885,6 +892,7 @@ multiclass SRegClass<int numRegs,
list<ValueType> regTypes,
SIRegisterTuples regList,
SIRegisterTuples ttmpList = regList,
+ bit hasNull = 0,
int copyCost = !sra(!add(numRegs, 1), 1)> {
defvar hasTTMP = !ne(regList, ttmpList);
defvar suffix = !cast<string>(!mul(numRegs, 32));
@@ -901,7 +909,7 @@ multiclass SRegClass<int numRegs,
}
}
- def SReg_ # suffix :
+ def SReg_ # suffix # !if(hasNull, "_XNULL", ""):
SIRegisterClass<"AMDGPU", regTypes, 32,
!con((add !cast<RegisterClass>(sgprName)),
!if(hasTTMP,
@@ -910,15 +918,24 @@ multiclass SRegClass<int numRegs,
let isAllocatable = 0;
let BaseClassOrder = !mul(numRegs, 32);
}
+
+ if hasNull then {
+ def SReg_ # suffix :
+ SIRegisterClass<"AMDGPU", regTypes, 32,
+ (add !cast<RegisterClass>("SReg_" # suffix # "_XNULL"), !cast<Register>("SGPR_NULL" # suffix))> {
+ let isAllocatable = 0;
+ let BaseClassOrder = !mul(numRegs, 32);
+ }
+ }
}
}
defm "" : SRegClass<3, Reg96Types.types, SGPR_96Regs, TTMP_96Regs>;
-defm "" : SRegClass<4, Reg128Types.types, SGPR_128Regs, TTMP_128Regs>;
+defm "" : SRegClass<4, Reg128Types.types, SGPR_128Regs, TTMP_128Regs, /*hasNull*/ true>;
defm "" : SRegClass<5, [v5i32, v5f32], SGPR_160Regs, TTMP_160Regs>;
defm "" : SRegClass<6, [v6i32, v6f32, v3i64, v3f64], SGPR_192Regs, TTMP_192Regs>;
defm "" : SRegClass<7, [v7i32, v7f32], SGPR_224Regs, TTMP_224Regs>;
-defm "" : SRegClass<8, [v8i32, v8f32, v4i64, v4f64, v16i16, v16f16, v16bf16], SGPR_256Regs, TTMP_256Regs>;
+defm "" : SRegClass<8, [v8i32, v8f32, v4i64, v4f64, v16i16, v16f16, v16bf16], SGPR_256Regs, TTMP_256Regs, /*hasNull*/ true>;
defm "" : SRegClass<9, [v9i32, v9f32], SGPR_288Regs, TTMP_288Regs>;
defm "" : SRegClass<10, [v10i32, v10f32], SGPR_320Regs, TTMP_320Regs>;
defm "" : SRegClass<11, [v11i32, v11f32], SGPR_352Regs, TTMP_352Regs>;
diff --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
index 42df457..979812e 100644
--- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
+++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
@@ -455,6 +455,7 @@ void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const {
break;
case AMDGPU::V_FMA_F16_e64:
case AMDGPU::V_FMA_F16_gfx9_e64:
+ case AMDGPU::V_FMA_F16_gfx9_fake16_e64:
NewOpcode = ST->hasTrue16BitInsts() ? AMDGPU::V_FMAAK_F16_fake16
: AMDGPU::V_FMAAK_F16;
break;
@@ -484,6 +485,7 @@ void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const {
break;
case AMDGPU::V_FMA_F16_e64:
case AMDGPU::V_FMA_F16_gfx9_e64:
+ case AMDGPU::V_FMA_F16_gfx9_fake16_e64:
NewOpcode = ST->hasTrue16BitInsts() ? AMDGPU::V_FMAMK_F16_fake16
: AMDGPU::V_FMAMK_F16;
break;
@@ -956,7 +958,8 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
MI.getOpcode() == AMDGPU::V_FMA_F32_e64 ||
MI.getOpcode() == AMDGPU::V_MAD_F16_e64 ||
MI.getOpcode() == AMDGPU::V_FMA_F16_e64 ||
- MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_e64) {
+ MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_e64 ||
+ MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_fake16_e64) {
shrinkMadFma(MI);
continue;
}
diff --git a/llvm/lib/Target/AMDGPU/SMInstructions.td b/llvm/lib/Target/AMDGPU/SMInstructions.td
index 1aeb4e8..37dcc100 100644
--- a/llvm/lib/Target/AMDGPU/SMInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SMInstructions.td
@@ -332,19 +332,19 @@ defm S_LOAD_I16 : SM_Pseudo_Loads <SReg_64, SReg_32_XM0_XEXEC>;
defm S_LOAD_U16 : SM_Pseudo_Loads <SReg_64, SReg_32_XM0_XEXEC>;
let is_buffer = 1 in {
-defm S_BUFFER_LOAD_DWORD : SM_Pseudo_Loads <SReg_128, SReg_32_XM0_XEXEC>;
+defm S_BUFFER_LOAD_DWORD : SM_Pseudo_Loads <SReg_128_XNULL, SReg_32_XM0_XEXEC>;
// FIXME: exec_lo/exec_hi appear to be allowed for SMRD loads on
// SI/CI, bit disallowed for SMEM on VI.
-defm S_BUFFER_LOAD_DWORDX2 : SM_Pseudo_Loads <SReg_128, SReg_64_XEXEC>;
+defm S_BUFFER_LOAD_DWORDX2 : SM_Pseudo_Loads <SReg_128_XNULL, SReg_64_XEXEC>;
let SubtargetPredicate = HasScalarDwordx3Loads in
- defm S_BUFFER_LOAD_DWORDX3 : SM_Pseudo_Loads <SReg_128, SReg_96>;
-defm S_BUFFER_LOAD_DWORDX4 : SM_Pseudo_Loads <SReg_128, SReg_128>;
-defm S_BUFFER_LOAD_DWORDX8 : SM_Pseudo_Loads <SReg_128, SReg_256>;
-defm S_BUFFER_LOAD_DWORDX16 : SM_Pseudo_Loads <SReg_128, SReg_512>;
-defm S_BUFFER_LOAD_I8 : SM_Pseudo_Loads <SReg_128, SReg_32_XM0_XEXEC>;
-defm S_BUFFER_LOAD_U8 : SM_Pseudo_Loads <SReg_128, SReg_32_XM0_XEXEC>;
-defm S_BUFFER_LOAD_I16 : SM_Pseudo_Loads <SReg_128, SReg_32_XM0_XEXEC>;
-defm S_BUFFER_LOAD_U16 : SM_Pseudo_Loads <SReg_128, SReg_32_XM0_XEXEC>;
+ defm S_BUFFER_LOAD_DWORDX3 : SM_Pseudo_Loads <SReg_128_XNULL, SReg_96>;
+defm S_BUFFER_LOAD_DWORDX4 : SM_Pseudo_Loads <SReg_128_XNULL, SReg_128>;
+defm S_BUFFER_LOAD_DWORDX8 : SM_Pseudo_Loads <SReg_128_XNULL, SReg_256>;
+defm S_BUFFER_LOAD_DWORDX16 : SM_Pseudo_Loads <SReg_128_XNULL, SReg_512>;
+defm S_BUFFER_LOAD_I8 : SM_Pseudo_Loads <SReg_128_XNULL, SReg_32_XM0_XEXEC>;
+defm S_BUFFER_LOAD_U8 : SM_Pseudo_Loads <SReg_128_XNULL, SReg_32_XM0_XEXEC>;
+defm S_BUFFER_LOAD_I16 : SM_Pseudo_Loads <SReg_128_XNULL, SReg_32_XM0_XEXEC>;
+defm S_BUFFER_LOAD_U16 : SM_Pseudo_Loads <SReg_128_XNULL, SReg_32_XM0_XEXEC>;
}
let SubtargetPredicate = HasScalarStores in {
@@ -353,9 +353,9 @@ defm S_STORE_DWORDX2 : SM_Pseudo_Stores <SReg_64, SReg_64_XEXEC>;
defm S_STORE_DWORDX4 : SM_Pseudo_Stores <SReg_64, SReg_128>;
let is_buffer = 1 in {
-defm S_BUFFER_STORE_DWORD : SM_Pseudo_Stores <SReg_128, SReg_32_XM0_XEXEC>;
-defm S_BUFFER_STORE_DWORDX2 : SM_Pseudo_Stores <SReg_128, SReg_64_XEXEC>;
-defm S_BUFFER_STORE_DWORDX4 : SM_Pseudo_Stores <SReg_128, SReg_128>;
+defm S_BUFFER_STORE_DWORD : SM_Pseudo_Stores <SReg_128_XNULL, SReg_32_XM0_XEXEC>;
+defm S_BUFFER_STORE_DWORDX2 : SM_Pseudo_Stores <SReg_128_XNULL, SReg_64_XEXEC>;
+defm S_BUFFER_STORE_DWORDX4 : SM_Pseudo_Stores <SReg_128_XNULL, SReg_128>;
}
} // End SubtargetPredicate = HasScalarStores
@@ -375,7 +375,7 @@ def S_DCACHE_WB_VOL : SM_Inval_Pseudo <"s_dcache_wb_vol", int_amdgcn_s_dcache_wb
defm S_ATC_PROBE : SM_Pseudo_Probe <SReg_64>;
let is_buffer = 1 in {
-defm S_ATC_PROBE_BUFFER : SM_Pseudo_Probe <SReg_128>;
+defm S_ATC_PROBE_BUFFER : SM_Pseudo_Probe <SReg_128_XNULL>;
}
} // SubtargetPredicate = isGFX8Plus
@@ -401,33 +401,33 @@ defm S_SCRATCH_STORE_DWORDX4 : SM_Pseudo_Stores <SReg_64, SReg_128>;
let SubtargetPredicate = HasScalarAtomics in {
let is_buffer = 1 in {
-defm S_BUFFER_ATOMIC_SWAP : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>;
-defm S_BUFFER_ATOMIC_CMPSWAP : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>;
-defm S_BUFFER_ATOMIC_ADD : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>;
-defm S_BUFFER_ATOMIC_SUB : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>;
-defm S_BUFFER_ATOMIC_SMIN : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>;
-defm S_BUFFER_ATOMIC_UMIN : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>;
-defm S_BUFFER_ATOMIC_SMAX : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>;
-defm S_BUFFER_ATOMIC_UMAX : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>;
-defm S_BUFFER_ATOMIC_AND : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>;
-defm S_BUFFER_ATOMIC_OR : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>;
-defm S_BUFFER_ATOMIC_XOR : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>;
-defm S_BUFFER_ATOMIC_INC : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>;
-defm S_BUFFER_ATOMIC_DEC : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>;
-
-defm S_BUFFER_ATOMIC_SWAP_X2 : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>;
-defm S_BUFFER_ATOMIC_CMPSWAP_X2 : SM_Pseudo_Atomics <SReg_128, SReg_128>;
-defm S_BUFFER_ATOMIC_ADD_X2 : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>;
-defm S_BUFFER_ATOMIC_SUB_X2 : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>;
-defm S_BUFFER_ATOMIC_SMIN_X2 : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>;
-defm S_BUFFER_ATOMIC_UMIN_X2 : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>;
-defm S_BUFFER_ATOMIC_SMAX_X2 : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>;
-defm S_BUFFER_ATOMIC_UMAX_X2 : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>;
-defm S_BUFFER_ATOMIC_AND_X2 : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>;
-defm S_BUFFER_ATOMIC_OR_X2 : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>;
-defm S_BUFFER_ATOMIC_XOR_X2 : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>;
-defm S_BUFFER_ATOMIC_INC_X2 : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>;
-defm S_BUFFER_ATOMIC_DEC_X2 : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>;
+defm S_BUFFER_ATOMIC_SWAP : SM_Pseudo_Atomics <SReg_128_XNULL, SReg_32_XM0_XEXEC>;
+defm S_BUFFER_ATOMIC_CMPSWAP : SM_Pseudo_Atomics <SReg_128_XNULL, SReg_64_XEXEC>;
+defm S_BUFFER_ATOMIC_ADD : SM_Pseudo_Atomics <SReg_128_XNULL, SReg_32_XM0_XEXEC>;
+defm S_BUFFER_ATOMIC_SUB : SM_Pseudo_Atomics <SReg_128_XNULL, SReg_32_XM0_XEXEC>;
+defm S_BUFFER_ATOMIC_SMIN : SM_Pseudo_Atomics <SReg_128_XNULL, SReg_32_XM0_XEXEC>;
+defm S_BUFFER_ATOMIC_UMIN : SM_Pseudo_Atomics <SReg_128_XNULL, SReg_32_XM0_XEXEC>;
+defm S_BUFFER_ATOMIC_SMAX : SM_Pseudo_Atomics <SReg_128_XNULL, SReg_32_XM0_XEXEC>;
+defm S_BUFFER_ATOMIC_UMAX : SM_Pseudo_Atomics <SReg_128_XNULL, SReg_32_XM0_XEXEC>;
+defm S_BUFFER_ATOMIC_AND : SM_Pseudo_Atomics <SReg_128_XNULL, SReg_32_XM0_XEXEC>;
+defm S_BUFFER_ATOMIC_OR : SM_Pseudo_Atomics <SReg_128_XNULL, SReg_32_XM0_XEXEC>;
+defm S_BUFFER_ATOMIC_XOR : SM_Pseudo_Atomics <SReg_128_XNULL, SReg_32_XM0_XEXEC>;
+defm S_BUFFER_ATOMIC_INC : SM_Pseudo_Atomics <SReg_128_XNULL, SReg_32_XM0_XEXEC>;
+defm S_BUFFER_ATOMIC_DEC : SM_Pseudo_Atomics <SReg_128_XNULL, SReg_32_XM0_XEXEC>;
+
+defm S_BUFFER_ATOMIC_SWAP_X2 : SM_Pseudo_Atomics <SReg_128_XNULL, SReg_64_XEXEC>;
+defm S_BUFFER_ATOMIC_CMPSWAP_X2 : SM_Pseudo_Atomics <SReg_128_XNULL, SReg_128>;
+defm S_BUFFER_ATOMIC_ADD_X2 : SM_Pseudo_Atomics <SReg_128_XNULL, SReg_64_XEXEC>;
+defm S_BUFFER_ATOMIC_SUB_X2 : SM_Pseudo_Atomics <SReg_128_XNULL, SReg_64_XEXEC>;
+defm S_BUFFER_ATOMIC_SMIN_X2 : SM_Pseudo_Atomics <SReg_128_XNULL, SReg_64_XEXEC>;
+defm S_BUFFER_ATOMIC_UMIN_X2 : SM_Pseudo_Atomics <SReg_128_XNULL, SReg_64_XEXEC>;
+defm S_BUFFER_ATOMIC_SMAX_X2 : SM_Pseudo_Atomics <SReg_128_XNULL, SReg_64_XEXEC>;
+defm S_BUFFER_ATOMIC_UMAX_X2 : SM_Pseudo_Atomics <SReg_128_XNULL, SReg_64_XEXEC>;
+defm S_BUFFER_ATOMIC_AND_X2 : SM_Pseudo_Atomics <SReg_128_XNULL, SReg_64_XEXEC>;
+defm S_BUFFER_ATOMIC_OR_X2 : SM_Pseudo_Atomics <SReg_128_XNULL, SReg_64_XEXEC>;
+defm S_BUFFER_ATOMIC_XOR_X2 : SM_Pseudo_Atomics <SReg_128_XNULL, SReg_64_XEXEC>;
+defm S_BUFFER_ATOMIC_INC_X2 : SM_Pseudo_Atomics <SReg_128_XNULL, SReg_64_XEXEC>;
+defm S_BUFFER_ATOMIC_DEC_X2 : SM_Pseudo_Atomics <SReg_128_XNULL, SReg_64_XEXEC>;
}
defm S_ATOMIC_SWAP : SM_Pseudo_Atomics <SReg_64, SReg_32_XM0_XEXEC>;
@@ -470,7 +470,7 @@ def S_PREFETCH_INST : SM_Prefetch_Pseudo <"s_prefetch_inst", SReg_64, 1>;
def S_PREFETCH_INST_PC_REL : SM_Prefetch_Pseudo <"s_prefetch_inst_pc_rel", SReg_64, 0>;
def S_PREFETCH_DATA : SM_Prefetch_Pseudo <"s_prefetch_data", SReg_64, 1>;
def S_PREFETCH_DATA_PC_REL : SM_Prefetch_Pseudo <"s_prefetch_data_pc_rel", SReg_64, 0>;
-def S_BUFFER_PREFETCH_DATA : SM_Prefetch_Pseudo <"s_buffer_prefetch_data", SReg_128, 1> {
+def S_BUFFER_PREFETCH_DATA : SM_Prefetch_Pseudo <"s_buffer_prefetch_data", SReg_128_XNULL, 1> {
let is_buffer = 1;
}
} // end let SubtargetPredicate = isGFX12Plus
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 1dd39be..b9c73e6 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -1018,9 +1018,9 @@ defm V_CLS_I32 : VOP1_Real_FULL_with_name_gfx11_gfx12<0x03b,
defm V_SWAP_B16 : VOP1Only_Real_gfx11_gfx12<0x066>;
defm V_PERMLANE64_B32 : VOP1Only_Real_gfx11_gfx12<0x067>;
defm V_MOV_B16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x01c, "v_mov_b16">;
-defm V_NOT_B16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x069, "v_not_b16">;
-defm V_CVT_I32_I16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x06a, "v_cvt_i32_i16">;
-defm V_CVT_U32_U16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x06b, "v_cvt_u32_u16">;
+defm V_NOT_B16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x069, "v_not_b16">;
+defm V_CVT_I32_I16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x06a, "v_cvt_i32_i16">;
+defm V_CVT_U32_U16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x06b, "v_cvt_u32_u16">;
defm V_CVT_F16_U16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x050, "v_cvt_f16_u16">;
defm V_CVT_F16_I16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x051, "v_cvt_f16_i16">;
@@ -1036,18 +1036,18 @@ defm V_LOG_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x057, "v_log_f16"
defm V_LOG_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x057, "v_log_f16">;
defm V_EXP_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x058, "v_exp_f16">;
defm V_EXP_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x058, "v_exp_f16">;
-defm V_FREXP_MANT_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x059, "v_frexp_mant_f16">;
+defm V_FREXP_MANT_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x059, "v_frexp_mant_f16">;
defm V_FREXP_EXP_I16_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x05a, "v_frexp_exp_i16_f16">;
defm V_FLOOR_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05b, "v_floor_f16">;
defm V_FLOOR_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05b, "v_floor_f16">;
defm V_CEIL_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05c, "v_ceil_f16">;
defm V_CEIL_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05c, "v_ceil_f16">;
-defm V_TRUNC_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05d, "v_trunc_f16">;
-defm V_RNDNE_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05e, "v_rndne_f16">;
-defm V_FRACT_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05f, "v_fract_f16">;
-defm V_SIN_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x060, "v_sin_f16">;
-defm V_COS_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x061, "v_cos_f16">;
-defm V_SAT_PK_U8_I16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x062, "v_sat_pk_u8_i16">;
+defm V_TRUNC_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x05d, "v_trunc_f16">;
+defm V_RNDNE_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x05e, "v_rndne_f16">;
+defm V_FRACT_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x05f, "v_fract_f16">;
+defm V_SIN_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x060, "v_sin_f16">;
+defm V_COS_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x061, "v_cos_f16">;
+defm V_SAT_PK_U8_I16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x062, "v_sat_pk_u8_i16">;
defm V_CVT_NORM_I16_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x063, "v_cvt_norm_i16_f16">;
defm V_CVT_NORM_U16_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x064, "v_cvt_norm_u16_f16">;
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 22e4576..24a2eed 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -340,7 +340,7 @@ let FPDPRounding = 1 in {
let SubtargetPredicate = isGFX9Plus in {
defm V_DIV_FIXUP_F16_gfx9 : VOP3Inst_t16 <"v_div_fixup_f16_gfx9", VOP_F16_F16_F16_F16, AMDGPUdiv_fixup>;
- defm V_FMA_F16_gfx9 : VOP3Inst <"v_fma_f16_gfx9", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, any_fma>;
+ defm V_FMA_F16_gfx9 : VOP3Inst_t16 <"v_fma_f16_gfx9", VOP_F16_F16_F16_F16, any_fma>;
} // End SubtargetPredicate = isGFX9Plus
} // End FPDPRounding = 1
@@ -1374,8 +1374,8 @@ class VOP3_DOT_Profile_fake16<VOPProfile P, VOP3Features Features = VOP3_REGULAR
let SubtargetPredicate = isGFX11Plus in {
defm V_MAXMIN_F32 : VOP3Inst<"v_maxmin_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
defm V_MINMAX_F32 : VOP3Inst<"v_minmax_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
- defm V_MAXMIN_F16 : VOP3Inst<"v_maxmin_f16", VOP3_Profile<VOP_F16_F16_F16_F16>>;
- defm V_MINMAX_F16 : VOP3Inst<"v_minmax_f16", VOP3_Profile<VOP_F16_F16_F16_F16>>;
+ defm V_MAXMIN_F16 : VOP3Inst_t16<"v_maxmin_f16", VOP_F16_F16_F16_F16>;
+ defm V_MINMAX_F16 : VOP3Inst_t16<"v_minmax_f16", VOP_F16_F16_F16_F16>;
defm V_MAXMIN_U32 : VOP3Inst<"v_maxmin_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
defm V_MINMAX_U32 : VOP3Inst<"v_minmax_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
defm V_MAXMIN_I32 : VOP3Inst<"v_maxmin_i32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
@@ -1578,8 +1578,8 @@ def : MinimumMaximumByMinimum3Maximum3<fmaximum, f32, V_MAXIMUM3_F32_e64>;
defm V_MIN3_NUM_F32 : VOP3_Realtriple_with_name_gfx12<0x229, "V_MIN3_F32", "v_min3_num_f32">;
defm V_MAX3_NUM_F32 : VOP3_Realtriple_with_name_gfx12<0x22a, "V_MAX3_F32", "v_max3_num_f32">;
-defm V_MIN3_NUM_F16 : VOP3_Realtriple_with_name_gfx12<0x22b, "V_MIN3_F16", "v_min3_num_f16">;
-defm V_MAX3_NUM_F16 : VOP3_Realtriple_with_name_gfx12<0x22c, "V_MAX3_F16", "v_max3_num_f16">;
+defm V_MIN3_NUM_F16 : VOP3_Realtriple_t16_and_fake16_gfx12<0x22b, "v_min3_num_f16", "V_MIN3_F16", "v_min3_f16">;
+defm V_MAX3_NUM_F16 : VOP3_Realtriple_t16_and_fake16_gfx12<0x22c, "v_max3_num_f16", "V_MAX3_F16", "v_max3_f16">;
defm V_MINIMUM3_F32 : VOP3Only_Realtriple_gfx12<0x22d>;
defm V_MAXIMUM3_F32 : VOP3Only_Realtriple_gfx12<0x22e>;
defm V_MINIMUM3_F16 : VOP3Only_Realtriple_t16_gfx12<0x22f>;
@@ -1588,8 +1588,8 @@ defm V_MED3_NUM_F32 : VOP3_Realtriple_with_name_gfx12<0x231, "V_MED3_F32",
defm V_MED3_NUM_F16 : VOP3_Realtriple_t16_and_fake16_gfx12<0x232, "v_med3_num_f16", "V_MED3_F16", "v_med3_f16">;
defm V_MINMAX_NUM_F32 : VOP3_Realtriple_with_name_gfx12<0x268, "V_MINMAX_F32", "v_minmax_num_f32">;
defm V_MAXMIN_NUM_F32 : VOP3_Realtriple_with_name_gfx12<0x269, "V_MAXMIN_F32", "v_maxmin_num_f32">;
-defm V_MINMAX_NUM_F16 : VOP3_Realtriple_with_name_gfx12<0x26a, "V_MINMAX_F16", "v_minmax_num_f16">;
-defm V_MAXMIN_NUM_F16 : VOP3_Realtriple_with_name_gfx12<0x26b, "V_MAXMIN_F16", "v_maxmin_num_f16">;
+defm V_MINMAX_NUM_F16 : VOP3_Realtriple_t16_and_fake16_gfx12<0x26a, "v_minmax_num_f16", "V_MINMAX_F16", "v_minmax_f16">;
+defm V_MAXMIN_NUM_F16 : VOP3_Realtriple_t16_and_fake16_gfx12<0x26b, "v_maxmin_num_f16", "V_MAXMIN_F16", "v_maxmin_f16">;
defm V_MINIMUMMAXIMUM_F32 : VOP3Only_Realtriple_gfx12<0x26c>;
defm V_MAXIMUMMINIMUM_F32 : VOP3Only_Realtriple_gfx12<0x26d>;
defm V_MINIMUMMAXIMUM_F16 : VOP3Only_Realtriple_t16_gfx12<0x26e>;
@@ -1708,7 +1708,7 @@ defm V_PERM_B32 : VOP3_Realtriple_gfx11_gfx12<0x244>;
defm V_XAD_U32 : VOP3_Realtriple_gfx11_gfx12<0x245>;
defm V_LSHL_ADD_U32 : VOP3_Realtriple_gfx11_gfx12<0x246>;
defm V_ADD_LSHL_U32 : VOP3_Realtriple_gfx11_gfx12<0x247>;
-defm V_FMA_F16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x248, "V_FMA_F16_gfx9", "v_fma_f16">;
+defm V_FMA_F16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x248, "v_fma_f16", "V_FMA_F16_gfx9">;
defm V_MIN3_F16 : VOP3Only_Realtriple_t16_and_fake16_gfx11<0x249, "v_min3_f16">;
defm V_MIN3_I16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x24a, "v_min3_i16">;
defm V_MIN3_U16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x24b, "v_min3_u16">;
@@ -1730,8 +1730,8 @@ defm V_PERMLANE16_B32 : VOP3_Real_Base_gfx11_gfx12<0x25b>;
defm V_PERMLANEX16_B32 : VOP3_Real_Base_gfx11_gfx12<0x25c>;
defm V_MAXMIN_F32 : VOP3_Realtriple_gfx11<0x25e>;
defm V_MINMAX_F32 : VOP3_Realtriple_gfx11<0x25f>;
-defm V_MAXMIN_F16 : VOP3_Realtriple_gfx11<0x260>;
-defm V_MINMAX_F16 : VOP3_Realtriple_gfx11<0x261>;
+defm V_MAXMIN_F16 : VOP3_Realtriple_t16_and_fake16_gfx11<0x260, "v_maxmin_f16">;
+defm V_MINMAX_F16 : VOP3_Realtriple_t16_and_fake16_gfx11<0x261, "v_minmax_f16">;
defm V_MAXMIN_U32 : VOP3_Realtriple_gfx11_gfx12<0x262>;
defm V_MINMAX_U32 : VOP3_Realtriple_gfx11_gfx12<0x263>;
defm V_MAXMIN_I32 : VOP3_Realtriple_gfx11_gfx12<0x264>;
diff --git a/llvm/lib/Target/AMDGPU/VOPCInstructions.td b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
index 9bf043e..8589d59 100644
--- a/llvm/lib/Target/AMDGPU/VOPCInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
@@ -1130,20 +1130,20 @@ defm : ICMP_Pattern <COND_SGE, V_CMP_GE_I64_e64, i64>;
defm : ICMP_Pattern <COND_SLT, V_CMP_LT_I64_e64, i64>;
defm : ICMP_Pattern <COND_SLE, V_CMP_LE_I64_e64, i64>;
-let OtherPredicates = [HasTrue16BitInsts] in {
-defm : ICMP_Pattern <COND_EQ, V_CMP_EQ_U16_t16_e64, i16>;
-defm : ICMP_Pattern <COND_NE, V_CMP_NE_U16_t16_e64, i16>;
-defm : ICMP_Pattern <COND_UGT, V_CMP_GT_U16_t16_e64, i16>;
-defm : ICMP_Pattern <COND_UGE, V_CMP_GE_U16_t16_e64, i16>;
-defm : ICMP_Pattern <COND_ULT, V_CMP_LT_U16_t16_e64, i16>;
-defm : ICMP_Pattern <COND_ULE, V_CMP_LE_U16_t16_e64, i16>;
-defm : ICMP_Pattern <COND_SGT, V_CMP_GT_I16_t16_e64, i16>;
-defm : ICMP_Pattern <COND_SGE, V_CMP_GE_I16_t16_e64, i16>;
-defm : ICMP_Pattern <COND_SLT, V_CMP_LT_I16_t16_e64, i16>;
-defm : ICMP_Pattern <COND_SLE, V_CMP_LE_I16_t16_e64, i16>;
-} // End OtherPredicates = [HasTrue16BitInsts]
-
-let OtherPredicates = [NotHasTrue16BitInsts] in {
+let True16Predicate = UseFakeTrue16Insts in {
+defm : ICMP_Pattern <COND_EQ, V_CMP_EQ_U16_fake16_e64, i16>;
+defm : ICMP_Pattern <COND_NE, V_CMP_NE_U16_fake16_e64, i16>;
+defm : ICMP_Pattern <COND_UGT, V_CMP_GT_U16_fake16_e64, i16>;
+defm : ICMP_Pattern <COND_UGE, V_CMP_GE_U16_fake16_e64, i16>;
+defm : ICMP_Pattern <COND_ULT, V_CMP_LT_U16_fake16_e64, i16>;
+defm : ICMP_Pattern <COND_ULE, V_CMP_LE_U16_fake16_e64, i16>;
+defm : ICMP_Pattern <COND_SGT, V_CMP_GT_I16_fake16_e64, i16>;
+defm : ICMP_Pattern <COND_SGE, V_CMP_GE_I16_fake16_e64, i16>;
+defm : ICMP_Pattern <COND_SLT, V_CMP_LT_I16_fake16_e64, i16>;
+defm : ICMP_Pattern <COND_SLE, V_CMP_LE_I16_fake16_e64, i16>;
+} // End True16Predicate = UseFakeTrue16Insts
+
+let True16Predicate = NotHasTrue16BitInsts in {
defm : ICMP_Pattern <COND_EQ, V_CMP_EQ_U16_e64, i16>;
defm : ICMP_Pattern <COND_NE, V_CMP_NE_U16_e64, i16>;
defm : ICMP_Pattern <COND_UGT, V_CMP_GT_U16_e64, i16>;
@@ -1154,7 +1154,7 @@ defm : ICMP_Pattern <COND_SGT, V_CMP_GT_I16_e64, i16>;
defm : ICMP_Pattern <COND_SGE, V_CMP_GE_I16_e64, i16>;
defm : ICMP_Pattern <COND_SLT, V_CMP_LT_I16_e64, i16>;
defm : ICMP_Pattern <COND_SLE, V_CMP_LE_I16_e64, i16>;
-} // End OtherPredicates = [NotHasTrue16BitInsts]
+} // End True16Predicate = NotHasTrue16BitInsts
multiclass FCMP_Pattern <PatFrags cond, Instruction inst, ValueType vt> {
let WaveSizePredicate = isWave64 in
@@ -1215,25 +1215,25 @@ defm : FCMP_Pattern <COND_UGE, V_CMP_NLT_F64_e64, f64>;
defm : FCMP_Pattern <COND_ULT, V_CMP_NGE_F64_e64, f64>;
defm : FCMP_Pattern <COND_ULE, V_CMP_NGT_F64_e64, f64>;
-let OtherPredicates = [HasTrue16BitInsts] in {
-defm : FCMP_Pattern <COND_O, V_CMP_O_F16_t16_e64, f16>;
-defm : FCMP_Pattern <COND_UO, V_CMP_U_F16_t16_e64, f16>;
-defm : FCMP_Pattern <COND_OEQ, V_CMP_EQ_F16_t16_e64, f16>;
-defm : FCMP_Pattern <COND_ONE, V_CMP_NEQ_F16_t16_e64, f16>;
-defm : FCMP_Pattern <COND_OGT, V_CMP_GT_F16_t16_e64, f16>;
-defm : FCMP_Pattern <COND_OGE, V_CMP_GE_F16_t16_e64, f16>;
-defm : FCMP_Pattern <COND_OLT, V_CMP_LT_F16_t16_e64, f16>;
-defm : FCMP_Pattern <COND_OLE, V_CMP_LE_F16_t16_e64, f16>;
-
-defm : FCMP_Pattern <COND_UEQ, V_CMP_NLG_F16_t16_e64, f16>;
-defm : FCMP_Pattern <COND_UNE, V_CMP_NEQ_F16_t16_e64, f16>;
-defm : FCMP_Pattern <COND_UGT, V_CMP_NLE_F16_t16_e64, f16>;
-defm : FCMP_Pattern <COND_UGE, V_CMP_NLT_F16_t16_e64, f16>;
-defm : FCMP_Pattern <COND_ULT, V_CMP_NGE_F16_t16_e64, f16>;
-defm : FCMP_Pattern <COND_ULE, V_CMP_NGT_F16_t16_e64, f16>;
-} // End OtherPredicates = [HasTrue16BitInsts]
-
-let OtherPredicates = [NotHasTrue16BitInsts] in {
+let True16Predicate = UseFakeTrue16Insts in {
+defm : FCMP_Pattern <COND_O, V_CMP_O_F16_fake16_e64, f16>;
+defm : FCMP_Pattern <COND_UO, V_CMP_U_F16_fake16_e64, f16>;
+defm : FCMP_Pattern <COND_OEQ, V_CMP_EQ_F16_fake16_e64, f16>;
+defm : FCMP_Pattern <COND_ONE, V_CMP_NEQ_F16_fake16_e64, f16>;
+defm : FCMP_Pattern <COND_OGT, V_CMP_GT_F16_fake16_e64, f16>;
+defm : FCMP_Pattern <COND_OGE, V_CMP_GE_F16_fake16_e64, f16>;
+defm : FCMP_Pattern <COND_OLT, V_CMP_LT_F16_fake16_e64, f16>;
+defm : FCMP_Pattern <COND_OLE, V_CMP_LE_F16_fake16_e64, f16>;
+
+defm : FCMP_Pattern <COND_UEQ, V_CMP_NLG_F16_fake16_e64, f16>;
+defm : FCMP_Pattern <COND_UNE, V_CMP_NEQ_F16_fake16_e64, f16>;
+defm : FCMP_Pattern <COND_UGT, V_CMP_NLE_F16_fake16_e64, f16>;
+defm : FCMP_Pattern <COND_UGE, V_CMP_NLT_F16_fake16_e64, f16>;
+defm : FCMP_Pattern <COND_ULT, V_CMP_NGE_F16_fake16_e64, f16>;
+defm : FCMP_Pattern <COND_ULE, V_CMP_NGT_F16_fake16_e64, f16>;
+} // End True16Predicate = UseFakeTrue16Insts
+
+let True16Predicate = NotHasTrue16BitInsts in {
defm : FCMP_Pattern <COND_O, V_CMP_O_F16_e64, f16>;
defm : FCMP_Pattern <COND_UO, V_CMP_U_F16_e64, f16>;
defm : FCMP_Pattern <COND_OEQ, V_CMP_EQ_F16_e64, f16>;
@@ -1249,7 +1249,7 @@ defm : FCMP_Pattern <COND_UGT, V_CMP_NLE_F16_e64, f16>;
defm : FCMP_Pattern <COND_UGE, V_CMP_NLT_F16_e64, f16>;
defm : FCMP_Pattern <COND_ULT, V_CMP_NGE_F16_e64, f16>;
defm : FCMP_Pattern <COND_ULE, V_CMP_NGT_F16_e64, f16>;
-} // End OtherPredicates = [NotHasTrue16BitInsts]
+} // End True16Predicate = NotHasTrue16BitInsts
//===----------------------------------------------------------------------===//
// DPP Encodings
@@ -1707,23 +1707,6 @@ multiclass VOPCX_Real_t16_gfx11_gfx12<bits<9> op, string asm_name,
VOPCX_Real_t16<GFX11Gen, op, asm_name, OpName, pseudo_mnemonic>,
VOPCX_Real_t16<GFX12Gen, op, asm_name, OpName, pseudo_mnemonic>;
-defm V_CMP_F_F16_t16 : VOPC_Real_t16_gfx11<0x000, "v_cmp_f_f16">;
-defm V_CMP_LT_F16_t16 : VOPC_Real_t16_gfx11_gfx12<0x001, "v_cmp_lt_f16">;
-defm V_CMP_EQ_F16_t16 : VOPC_Real_t16_gfx11_gfx12<0x002, "v_cmp_eq_f16">;
-defm V_CMP_LE_F16_t16 : VOPC_Real_t16_gfx11_gfx12<0x003, "v_cmp_le_f16">;
-defm V_CMP_GT_F16_t16 : VOPC_Real_t16_gfx11_gfx12<0x004, "v_cmp_gt_f16">;
-defm V_CMP_LG_F16_t16 : VOPC_Real_t16_gfx11_gfx12<0x005, "v_cmp_lg_f16">;
-defm V_CMP_GE_F16_t16 : VOPC_Real_t16_gfx11_gfx12<0x006, "v_cmp_ge_f16">;
-defm V_CMP_O_F16_t16 : VOPC_Real_t16_gfx11_gfx12<0x007, "v_cmp_o_f16">;
-defm V_CMP_U_F16_t16 : VOPC_Real_t16_gfx11_gfx12<0x008, "v_cmp_u_f16">;
-defm V_CMP_NGE_F16_t16 : VOPC_Real_t16_gfx11_gfx12<0x009, "v_cmp_nge_f16">;
-defm V_CMP_NLG_F16_t16 : VOPC_Real_t16_gfx11_gfx12<0x00a, "v_cmp_nlg_f16">;
-defm V_CMP_NGT_F16_t16 : VOPC_Real_t16_gfx11_gfx12<0x00b, "v_cmp_ngt_f16">;
-defm V_CMP_NLE_F16_t16 : VOPC_Real_t16_gfx11_gfx12<0x00c, "v_cmp_nle_f16">;
-defm V_CMP_NEQ_F16_t16 : VOPC_Real_t16_gfx11_gfx12<0x00d, "v_cmp_neq_f16">;
-defm V_CMP_NLT_F16_t16 : VOPC_Real_t16_gfx11_gfx12<0x00e, "v_cmp_nlt_f16">;
-defm V_CMP_T_F16_t16 : VOPC_Real_t16_gfx11<0x00f, "v_cmp_t_f16", "V_CMP_TRU_F16_t16", "v_cmp_tru_f16">;
-
defm V_CMP_F_F16_fake16 : VOPC_Real_t16_gfx11<0x000, "v_cmp_f_f16">;
defm V_CMP_LT_F16_fake16 : VOPC_Real_t16_gfx11_gfx12<0x001, "v_cmp_lt_f16">;
defm V_CMP_EQ_F16_fake16 : VOPC_Real_t16_gfx11_gfx12<0x002, "v_cmp_eq_f16">;
@@ -1759,19 +1742,6 @@ defm V_CMP_NLT_F32 : VOPC_Real_gfx11_gfx12<0x01e>;
defm V_CMP_T_F32 : VOPC_Real_with_name_gfx11<0x01f, "V_CMP_TRU_F32", "v_cmp_t_f32">;
defm V_CMP_T_F64 : VOPC_Real_with_name_gfx11<0x02f, "V_CMP_TRU_F64", "v_cmp_t_f64">;
-defm V_CMP_LT_I16_t16 : VOPC_Real_t16_gfx11_gfx12<0x031, "v_cmp_lt_i16">;
-defm V_CMP_EQ_I16_t16 : VOPC_Real_t16_gfx11_gfx12<0x032, "v_cmp_eq_i16">;
-defm V_CMP_LE_I16_t16 : VOPC_Real_t16_gfx11_gfx12<0x033, "v_cmp_le_i16">;
-defm V_CMP_GT_I16_t16 : VOPC_Real_t16_gfx11_gfx12<0x034, "v_cmp_gt_i16">;
-defm V_CMP_NE_I16_t16 : VOPC_Real_t16_gfx11_gfx12<0x035, "v_cmp_ne_i16">;
-defm V_CMP_GE_I16_t16 : VOPC_Real_t16_gfx11_gfx12<0x036, "v_cmp_ge_i16">;
-defm V_CMP_LT_U16_t16 : VOPC_Real_t16_gfx11_gfx12<0x039, "v_cmp_lt_u16">;
-defm V_CMP_EQ_U16_t16 : VOPC_Real_t16_gfx11_gfx12<0x03a, "v_cmp_eq_u16">;
-defm V_CMP_LE_U16_t16 : VOPC_Real_t16_gfx11_gfx12<0x03b, "v_cmp_le_u16">;
-defm V_CMP_GT_U16_t16 : VOPC_Real_t16_gfx11_gfx12<0x03c, "v_cmp_gt_u16">;
-defm V_CMP_NE_U16_t16 : VOPC_Real_t16_gfx11_gfx12<0x03d, "v_cmp_ne_u16">;
-defm V_CMP_GE_U16_t16 : VOPC_Real_t16_gfx11_gfx12<0x03e, "v_cmp_ge_u16">;
-
defm V_CMP_LT_I16_fake16 : VOPC_Real_t16_gfx11_gfx12<0x031, "v_cmp_lt_i16">;
defm V_CMP_EQ_I16_fake16 : VOPC_Real_t16_gfx11_gfx12<0x032, "v_cmp_eq_i16">;
defm V_CMP_LE_I16_fake16 : VOPC_Real_t16_gfx11_gfx12<0x033, "v_cmp_le_i16">;
@@ -1819,28 +1789,10 @@ defm V_CMP_NE_U64 : VOPC_Real_gfx11_gfx12<0x05d>;
defm V_CMP_GE_U64 : VOPC_Real_gfx11_gfx12<0x05e>;
defm V_CMP_T_U64 : VOPC_Real_gfx11<0x05f>;
-defm V_CMP_CLASS_F16_t16 : VOPC_Real_t16_gfx11_gfx12<0x07d, "v_cmp_class_f16">;
defm V_CMP_CLASS_F16_fake16 : VOPC_Real_t16_gfx11_gfx12<0x07d, "v_cmp_class_f16">;
defm V_CMP_CLASS_F32 : VOPC_Real_gfx11_gfx12<0x07e>;
defm V_CMP_CLASS_F64 : VOPC_Real_gfx11_gfx12<0x07f>;
-defm V_CMPX_F_F16_t16 : VOPCX_Real_t16_gfx11<0x080, "v_cmpx_f_f16">;
-defm V_CMPX_LT_F16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x081, "v_cmpx_lt_f16">;
-defm V_CMPX_EQ_F16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x082, "v_cmpx_eq_f16">;
-defm V_CMPX_LE_F16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x083, "v_cmpx_le_f16">;
-defm V_CMPX_GT_F16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x084, "v_cmpx_gt_f16">;
-defm V_CMPX_LG_F16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x085, "v_cmpx_lg_f16">;
-defm V_CMPX_GE_F16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x086, "v_cmpx_ge_f16">;
-defm V_CMPX_O_F16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x087, "v_cmpx_o_f16">;
-defm V_CMPX_U_F16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x088, "v_cmpx_u_f16">;
-defm V_CMPX_NGE_F16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x089, "v_cmpx_nge_f16">;
-defm V_CMPX_NLG_F16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x08a, "v_cmpx_nlg_f16">;
-defm V_CMPX_NGT_F16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x08b, "v_cmpx_ngt_f16">;
-defm V_CMPX_NLE_F16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x08c, "v_cmpx_nle_f16">;
-defm V_CMPX_NEQ_F16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x08d, "v_cmpx_neq_f16">;
-defm V_CMPX_NLT_F16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x08e, "v_cmpx_nlt_f16">;
-defm V_CMPX_T_F16_t16 : VOPCX_Real_with_name_gfx11<0x08f, "V_CMPX_TRU_F16_t16", "v_cmpx_t_f16", "v_cmpx_tru_f16">;
-
defm V_CMPX_F_F16_fake16 : VOPCX_Real_t16_gfx11<0x080, "v_cmpx_f_f16">;
defm V_CMPX_LT_F16_fake16 : VOPCX_Real_t16_gfx11_gfx12<0x081, "v_cmpx_lt_f16">;
defm V_CMPX_EQ_F16_fake16 : VOPCX_Real_t16_gfx11_gfx12<0x082, "v_cmpx_eq_f16">;
@@ -1892,19 +1844,6 @@ defm V_CMPX_NEQ_F64 : VOPCX_Real_gfx11_gfx12<0x0ad>;
defm V_CMPX_NLT_F64 : VOPCX_Real_gfx11_gfx12<0x0ae>;
defm V_CMPX_T_F64 : VOPCX_Real_with_name_gfx11<0x0af, "V_CMPX_TRU_F64", "v_cmpx_t_f64">;
-defm V_CMPX_LT_I16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x0b1, "v_cmpx_lt_i16">;
-defm V_CMPX_EQ_I16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x0b2, "v_cmpx_eq_i16">;
-defm V_CMPX_LE_I16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x0b3, "v_cmpx_le_i16">;
-defm V_CMPX_GT_I16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x0b4, "v_cmpx_gt_i16">;
-defm V_CMPX_NE_I16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x0b5, "v_cmpx_ne_i16">;
-defm V_CMPX_GE_I16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x0b6, "v_cmpx_ge_i16">;
-defm V_CMPX_LT_U16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x0b9, "v_cmpx_lt_u16">;
-defm V_CMPX_EQ_U16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x0ba, "v_cmpx_eq_u16">;
-defm V_CMPX_LE_U16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x0bb, "v_cmpx_le_u16">;
-defm V_CMPX_GT_U16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x0bc, "v_cmpx_gt_u16">;
-defm V_CMPX_NE_U16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x0bd, "v_cmpx_ne_u16">;
-defm V_CMPX_GE_U16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x0be, "v_cmpx_ge_u16">;
-
defm V_CMPX_LT_I16_fake16 : VOPCX_Real_t16_gfx11_gfx12<0x0b1, "v_cmpx_lt_i16">;
defm V_CMPX_EQ_I16_fake16 : VOPCX_Real_t16_gfx11_gfx12<0x0b2, "v_cmpx_eq_i16">;
defm V_CMPX_LE_I16_fake16 : VOPCX_Real_t16_gfx11_gfx12<0x0b3, "v_cmpx_le_i16">;
@@ -1951,7 +1890,6 @@ defm V_CMPX_GT_U64 : VOPCX_Real_gfx11_gfx12<0x0dc>;
defm V_CMPX_NE_U64 : VOPCX_Real_gfx11_gfx12<0x0dd>;
defm V_CMPX_GE_U64 : VOPCX_Real_gfx11_gfx12<0x0de>;
defm V_CMPX_T_U64 : VOPCX_Real_gfx11<0x0df>;
-defm V_CMPX_CLASS_F16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x0fd, "v_cmpx_class_f16">;
defm V_CMPX_CLASS_F16_fake16 : VOPCX_Real_t16_gfx11_gfx12<0x0fd, "v_cmpx_class_f16">;
defm V_CMPX_CLASS_F32 : VOPCX_Real_gfx11_gfx12<0x0fe>;
defm V_CMPX_CLASS_F64 : VOPCX_Real_gfx11_gfx12<0x0ff>;
diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td
index d236907..930ed9a 100644
--- a/llvm/lib/Target/AMDGPU/VOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td
@@ -1909,8 +1909,8 @@ multiclass VOP3_Realtriple_t16_gfx11<bits<10> op, string asmName, string opName
multiclass VOP3_Realtriple_t16_and_fake16_gfx11<bits<10> op, string asmName, string opName = NAME,
string pseudo_mnemonic = "", bit isSingle = 0> {
- defm _t16: VOP3_Realtriple_t16_gfx11<op, opName#"_t16", asmName, pseudo_mnemonic, isSingle>;
- defm _fake16: VOP3_Realtriple_t16_gfx11<op, opName#"_fake16", asmName, pseudo_mnemonic, isSingle>;
+ defm _t16: VOP3_Realtriple_t16_gfx11<op, asmName, opName#"_t16", pseudo_mnemonic, isSingle>;
+ defm _fake16: VOP3_Realtriple_t16_gfx11<op, asmName, opName#"_fake16", pseudo_mnemonic, isSingle>;
}
multiclass VOP3Only_Realtriple_t16_gfx11<bits<10> op, string asmName,
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 5ec2d83..2e517c2 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -806,7 +806,9 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::BITCAST, MVT::bf16, Custom);
} else {
setOperationAction(ISD::BF16_TO_FP, MVT::f32, Expand);
+ setOperationAction(ISD::BF16_TO_FP, MVT::f64, Expand);
setOperationAction(ISD::FP_TO_BF16, MVT::f32, Custom);
+ setOperationAction(ISD::FP_TO_BF16, MVT::f64, Custom);
}
for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td
index c67177c..009b60c 100644
--- a/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -3320,7 +3320,7 @@ def STRH_preidx: ARMPseudoInst<(outs GPR:$Rn_wb),
}
-
+let mayStore = 1, hasSideEffects = 0 in {
def STRH_PRE : AI3ldstidx<0b1011, 0, 1, (outs GPR:$Rn_wb),
(ins GPR:$Rt, addrmode3_pre:$addr), IndexModePre,
StMiscFrm, IIC_iStore_bh_ru,
@@ -3352,6 +3352,7 @@ def STRH_POST : AI3ldstidx<0b1011, 0, 0, (outs GPR:$Rn_wb),
let Inst{3-0} = offset{3-0}; // imm3_0/Rm
let DecoderMethod = "DecodeAddrMode3Instruction";
}
+} // mayStore = 1, hasSideEffects = 0
let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in {
def STRD_PRE : AI3ldstidx<0b1111, 0, 1, (outs GPR:$Rn_wb),
diff --git a/llvm/lib/Target/ARM/ARMSystemRegister.td b/llvm/lib/Target/ARM/ARMSystemRegister.td
index c03db15..3afc410 100644
--- a/llvm/lib/Target/ARM/ARMSystemRegister.td
+++ b/llvm/lib/Target/ARM/ARMSystemRegister.td
@@ -19,17 +19,13 @@ class MClassSysReg<bits<1> UniqMask1,
bits<1> UniqMask2,
bits<1> UniqMask3,
bits<12> Enc12,
- string name> : SearchableTable {
- let SearchableFields = ["Name", "M1Encoding12", "M2M3Encoding8", "Encoding"];
+ string name> {
string Name;
bits<13> M1Encoding12;
bits<10> M2M3Encoding8;
bits<12> Encoding;
let Name = name;
- let EnumValueField = "M1Encoding12";
- let EnumValueField = "M2M3Encoding8";
- let EnumValueField = "Encoding";
let M1Encoding12{12} = UniqMask1;
let M1Encoding12{11-00} = Enc12;
@@ -41,6 +37,27 @@ class MClassSysReg<bits<1> UniqMask1,
code Requires = [{ {} }];
}
+def MClassSysRegsList : GenericTable {
+ let FilterClass = "MClassSysReg";
+ let Fields = ["Name", "M1Encoding12", "M2M3Encoding8", "Encoding",
+ "Requires"];
+}
+
+def lookupMClassSysRegByName : SearchIndex {
+ let Table = MClassSysRegsList;
+ let Key = ["Name"];
+}
+
+def lookupMClassSysRegByM1Encoding12 : SearchIndex {
+ let Table = MClassSysRegsList;
+ let Key = ["M1Encoding12"];
+}
+
+def lookupMClassSysRegByM2M3Encoding8 : SearchIndex {
+ let Table = MClassSysRegsList;
+ let Key = ["M2M3Encoding8"];
+}
+
// [|i|e|x]apsr_nzcvq has alias [|i|e|x]apsr.
// Mask1 Mask2 Mask3 Enc12, Name
let Requires = [{ {ARM::FeatureDSP} }] in {
@@ -127,15 +144,29 @@ def : MClassSysReg<0, 0, 1, 0x8a7, "pac_key_u_3_ns">;
// Banked Registers
//
-class BankedReg<string name, bits<8> enc>
- : SearchableTable {
+class BankedReg<string name, bits<8> enc> {
string Name;
bits<8> Encoding;
let Name = name;
let Encoding = enc;
- let SearchableFields = ["Name", "Encoding"];
}
+def BankedRegsList : GenericTable {
+ let FilterClass = "BankedReg";
+ let Fields = ["Name", "Encoding"];
+}
+
+def lookupBankedRegByName : SearchIndex {
+ let Table = BankedRegsList;
+ let Key = ["Name"];
+}
+
+def lookupBankedRegByEncoding : SearchIndex {
+ let Table = BankedRegsList;
+ let Key = ["Encoding"];
+}
+
+
// The values here come from B9.2.3 of the ARM ARM, where bits 4-0 are SysM
// and bit 5 is R.
def : BankedReg<"r8_usr", 0x00>;
diff --git a/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp b/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp
index ab3a445..7efd298 100644
--- a/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp
+++ b/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp
@@ -752,7 +752,7 @@ Instruction *MVEGatherScatterLowering::tryCreateIncrementingGatScat(
// The gep was in charge of making sure the offsets are scaled correctly
// - calculate that factor so it can be applied by hand
int TypeScale =
- computeScale(DL->getTypeSizeInBits(GEP->getOperand(0)->getType()),
+ computeScale(DL->getTypeSizeInBits(GEP->getSourceElementType()),
DL->getTypeSizeInBits(GEP->getType()) /
cast<FixedVectorType>(GEP->getType())->getNumElements());
if (TypeScale == -1)
diff --git a/llvm/lib/Target/ARM/Utils/ARMBaseInfo.cpp b/llvm/lib/Target/ARM/Utils/ARMBaseInfo.cpp
index 494c67d..e76a70b 100644
--- a/llvm/lib/Target/ARM/Utils/ARMBaseInfo.cpp
+++ b/llvm/lib/Target/ARM/Utils/ARMBaseInfo.cpp
@@ -62,13 +62,13 @@ const MClassSysReg *lookupMClassSysRegBy8bitSYSmValue(unsigned SYSm) {
return ARMSysReg::lookupMClassSysRegByM2M3Encoding8((1<<8)|(SYSm & 0xFF));
}
-#define GET_MCLASSSYSREG_IMPL
+#define GET_MClassSysRegsList_IMPL
#include "ARMGenSystemRegister.inc"
} // end namespace ARMSysReg
namespace ARMBankedReg {
-#define GET_BANKEDREG_IMPL
+#define GET_BankedRegsList_IMPL
#include "ARMGenSystemRegister.inc"
} // end namespce ARMSysReg
} // end namespace llvm
diff --git a/llvm/lib/Target/ARM/Utils/ARMBaseInfo.h b/llvm/lib/Target/ARM/Utils/ARMBaseInfo.h
index 5562572..dc4f811 100644
--- a/llvm/lib/Target/ARM/Utils/ARMBaseInfo.h
+++ b/llvm/lib/Target/ARM/Utils/ARMBaseInfo.h
@@ -206,8 +206,8 @@ namespace ARMSysReg {
}
};
- #define GET_MCLASSSYSREG_DECL
- #include "ARMGenSystemRegister.inc"
+#define GET_MClassSysRegsList_DECL
+#include "ARMGenSystemRegister.inc"
// lookup system register using 12-bit SYSm value.
// Note: the search is uniqued using M1 mask
@@ -228,8 +228,8 @@ namespace ARMBankedReg {
const char *Name;
uint16_t Encoding;
};
- #define GET_BANKEDREG_DECL
- #include "ARMGenSystemRegister.inc"
+#define GET_BankedRegsList_DECL
+#include "ARMGenSystemRegister.inc"
} // end namespace ARMBankedReg
} // end namespace llvm
diff --git a/llvm/lib/Target/AVR/AVRDevices.td b/llvm/lib/Target/AVR/AVRDevices.td
index 5eca92ab..56147bb 100644
--- a/llvm/lib/Target/AVR/AVRDevices.td
+++ b/llvm/lib/Target/AVR/AVRDevices.td
@@ -60,6 +60,18 @@ def FeatureSmallStack
"The device has an 8-bit "
"stack pointer">;
+// The device potentially requires emitting rjmp that wraps across the flash
+// boundary.
+//
+// We enable this for devices that have exactly 8 kB of flash memory and don't
+// support the `jmp` instruction - with this feature enabled, we try to convert
+// out-of-bounds relative jumps into in-bounds by wrapping the offset, e.g.
+// `rjmp +5000` becomes `rjmp -3192`.
+def FeatureWrappingRjmp
+ : SubtargetFeature<"wrappingrjmp", "HasWrappingRjmp", "true",
+ "The device potentially requires emitting rjmp that "
+ "wraps across the flash boundary">;
+
// The device supports the 16-bit GPR pair MOVW instruction.
def FeatureMOVW : SubtargetFeature<"movw", "HasMOVW", "true",
"The device supports the 16-bit MOVW "
@@ -274,11 +286,11 @@ def : Device<"at86rf401", FamilyAVR2, ELFArchAVR25, [FeatureMOVW, FeatureLPMX]>;
def : Device<"at90s4414", FamilyAVR2, ELFArchAVR2, [FeatureSmallStack]>;
def : Device<"at90s4433", FamilyAVR2, ELFArchAVR2, [FeatureSmallStack]>;
def : Device<"at90s4434", FamilyAVR2, ELFArchAVR2, [FeatureSmallStack]>;
-def : Device<"at90s8515", FamilyAVR2, ELFArchAVR2>;
-def : Device<"at90c8534", FamilyAVR2, ELFArchAVR2>;
-def : Device<"at90s8535", FamilyAVR2, ELFArchAVR2>;
-def : Device<"ata5272", FamilyAVR25, ELFArchAVR25>;
-def : Device<"ata6616c", FamilyAVR25, ELFArchAVR25>;
+def : Device<"at90s8515", FamilyAVR2, ELFArchAVR2, [FeatureWrappingRjmp]>;
+def : Device<"at90c8534", FamilyAVR2, ELFArchAVR2, [FeatureWrappingRjmp]>;
+def : Device<"at90s8535", FamilyAVR2, ELFArchAVR2, [FeatureWrappingRjmp]>;
+def : Device<"ata5272", FamilyAVR25, ELFArchAVR25, [FeatureWrappingRjmp]>;
+def : Device<"ata6616c", FamilyAVR25, ELFArchAVR25, [FeatureWrappingRjmp]>;
def : Device<"attiny13", FamilyAVR25, ELFArchAVR25, [FeatureSmallStack]>;
def : Device<"attiny13a", FamilyAVR25, ELFArchAVR25, [FeatureSmallStack]>;
def : Device<"attiny2313", FamilyAVR25, ELFArchAVR25, [FeatureSmallStack]>;
@@ -288,24 +300,24 @@ def : Device<"attiny24a", FamilyAVR25, ELFArchAVR25, [FeatureSmallStack]>;
def : Device<"attiny4313", FamilyAVR25, ELFArchAVR25>;
def : Device<"attiny44", FamilyAVR25, ELFArchAVR25>;
def : Device<"attiny44a", FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny84", FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny84a", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny84", FamilyAVR25, ELFArchAVR25, [FeatureWrappingRjmp]>;
+def : Device<"attiny84a", FamilyAVR25, ELFArchAVR25, [FeatureWrappingRjmp]>;
def : Device<"attiny25", FamilyAVR25, ELFArchAVR25, [FeatureSmallStack]>;
def : Device<"attiny45", FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny85", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny85", FamilyAVR25, ELFArchAVR25, [FeatureWrappingRjmp]>;
def : Device<"attiny261", FamilyAVR25, ELFArchAVR25, [FeatureSmallStack]>;
def : Device<"attiny261a", FamilyAVR25, ELFArchAVR25, [FeatureSmallStack]>;
def : Device<"attiny441", FamilyAVR25, ELFArchAVR25>;
def : Device<"attiny461", FamilyAVR25, ELFArchAVR25>;
def : Device<"attiny461a", FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny841", FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny861", FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny861a", FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny87", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny841", FamilyAVR25, ELFArchAVR25, [FeatureWrappingRjmp]>;
+def : Device<"attiny861", FamilyAVR25, ELFArchAVR25, [FeatureWrappingRjmp]>;
+def : Device<"attiny861a", FamilyAVR25, ELFArchAVR25, [FeatureWrappingRjmp]>;
+def : Device<"attiny87", FamilyAVR25, ELFArchAVR25, [FeatureWrappingRjmp]>;
def : Device<"attiny43u", FamilyAVR25, ELFArchAVR25>;
def : Device<"attiny48", FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny88", FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny828", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny88", FamilyAVR25, ELFArchAVR25, [FeatureWrappingRjmp]>;
+def : Device<"attiny828", FamilyAVR25, ELFArchAVR25, [FeatureWrappingRjmp]>;
def : Device<"at43usb355", FamilyAVR3, ELFArchAVR3>;
def : Device<"at76c711", FamilyAVR3, ELFArchAVR3>;
def : Device<"atmega103", FamilyAVR31, ELFArchAVR31>;
@@ -321,11 +333,11 @@ def : Device<"atmega16u2", FamilyAVR35, ELFArchAVR35>;
def : Device<"atmega32u2", FamilyAVR35, ELFArchAVR35>;
def : Device<"attiny1634", FamilyAVR35, ELFArchAVR35>;
def : Device<"atmega8", FamilyAVR2, ELFArchAVR4,
- [FeatureMultiplication, FeatureMOVW, FeatureLPMX, FeatureSPM]>;
+ [FeatureMultiplication, FeatureMOVW, FeatureLPMX, FeatureSPM, FeatureWrappingRjmp]>;
def : Device<"ata6289", FamilyAVR4, ELFArchAVR4>;
def : Device<"atmega8a", FamilyAVR2, ELFArchAVR4,
- [FeatureMultiplication, FeatureMOVW, FeatureLPMX, FeatureSPM]>;
-def : Device<"ata6285", FamilyAVR4, ELFArchAVR4>;
+ [FeatureMultiplication, FeatureMOVW, FeatureLPMX, FeatureSPM, FeatureWrappingRjmp]>;
+def : Device<"ata6285", FamilyAVR4, ELFArchAVR4, [FeatureWrappingRjmp]>;
def : Device<"ata6286", FamilyAVR4, ELFArchAVR4>;
def : Device<"ata6612c", FamilyAVR4, ELFArchAVR4>;
def : Device<"atmega48", FamilyAVR4, ELFArchAVR4>;
@@ -339,9 +351,9 @@ def : Device<"atmega88p", FamilyAVR4, ELFArchAVR4>;
def : Device<"atmega88pa", FamilyAVR4, ELFArchAVR4>;
def : Device<"atmega88pb", FamilyAVR4, ELFArchAVR4>;
def : Device<"atmega8515", FamilyAVR2, ELFArchAVR4,
- [FeatureMultiplication, FeatureMOVW, FeatureLPMX, FeatureSPM]>;
+ [FeatureMultiplication, FeatureMOVW, FeatureLPMX, FeatureSPM, FeatureWrappingRjmp]>;
def : Device<"atmega8535", FamilyAVR2, ELFArchAVR4,
- [FeatureMultiplication, FeatureMOVW, FeatureLPMX, FeatureSPM]>;
+ [FeatureMultiplication, FeatureMOVW, FeatureLPMX, FeatureSPM, FeatureWrappingRjmp]>;
def : Device<"atmega8hva", FamilyAVR4, ELFArchAVR4>;
def : Device<"at90pwm1", FamilyAVR4, ELFArchAVR4>;
def : Device<"at90pwm2", FamilyAVR4, ELFArchAVR4>;
diff --git a/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp b/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp
index 17c48c2..fd35f8f 100644
--- a/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp
+++ b/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp
@@ -27,16 +27,13 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-// FIXME: we should be doing checks to make sure asm operands
-// are not out of bounds.
-
namespace adjust {
using namespace llvm;
static void signed_width(unsigned Width, uint64_t Value,
std::string Description, const MCFixup &Fixup,
- MCContext *Ctx = nullptr) {
+ MCContext *Ctx) {
if (!isIntN(Width, Value)) {
std::string Diagnostic = "out of range " + Description;
@@ -46,17 +43,13 @@ static void signed_width(unsigned Width, uint64_t Value,
Diagnostic += " (expected an integer in the range " + std::to_string(Min) +
" to " + std::to_string(Max) + ")";
- if (Ctx) {
- Ctx->reportError(Fixup.getLoc(), Diagnostic);
- } else {
- llvm_unreachable(Diagnostic.c_str());
- }
+ Ctx->reportError(Fixup.getLoc(), Diagnostic);
}
}
static void unsigned_width(unsigned Width, uint64_t Value,
std::string Description, const MCFixup &Fixup,
- MCContext *Ctx = nullptr) {
+ MCContext *Ctx) {
if (!isUIntN(Width, Value)) {
std::string Diagnostic = "out of range " + Description;
@@ -65,17 +58,13 @@ static void unsigned_width(unsigned Width, uint64_t Value,
Diagnostic +=
" (expected an integer in the range 0 to " + std::to_string(Max) + ")";
- if (Ctx) {
- Ctx->reportError(Fixup.getLoc(), Diagnostic);
- } else {
- llvm_unreachable(Diagnostic.c_str());
- }
+ Ctx->reportError(Fixup.getLoc(), Diagnostic);
}
}
/// Adjusts the value of a branch target before fixup application.
static void adjustBranch(unsigned Size, const MCFixup &Fixup, uint64_t &Value,
- MCContext *Ctx = nullptr) {
+ MCContext *Ctx) {
// We have one extra bit of precision because the value is rightshifted by
// one.
unsigned_width(Size + 1, Value, std::string("branch target"), Fixup, Ctx);
@@ -86,13 +75,28 @@ static void adjustBranch(unsigned Size, const MCFixup &Fixup, uint64_t &Value,
/// Adjusts the value of a relative branch target before fixup application.
static void adjustRelativeBranch(unsigned Size, const MCFixup &Fixup,
- uint64_t &Value, MCContext *Ctx = nullptr) {
+ uint64_t &Value, MCContext *Ctx) {
// Jumps are relative to the current instruction.
Value -= 2;
// We have one extra bit of precision because the value is rightshifted by
// one.
- signed_width(Size + 1, Value, std::string("branch target"), Fixup, Ctx);
+ Size += 1;
+
+ if (!isIntN(Size, Value) &&
+ Ctx->getSubtargetInfo()->hasFeature(AVR::FeatureWrappingRjmp)) {
+ const int32_t FlashSize = 0x2000;
+ int32_t SignedValue = Value;
+
+ uint64_t WrappedValue = SignedValue > 0 ? (uint64_t)(Value - FlashSize)
+ : (uint64_t)(FlashSize + Value);
+
+ if (isIntN(Size, WrappedValue)) {
+ Value = WrappedValue;
+ }
+ }
+
+ signed_width(Size, Value, std::string("branch target"), Fixup, Ctx);
// Rightshifts the value by one.
AVR::fixups::adjustBranchTarget(Value);
@@ -105,7 +109,7 @@ static void adjustRelativeBranch(unsigned Size, const MCFixup &Fixup,
///
/// Offset of 0 (so the result is left shifted by 3 bits before application).
static void fixup_call(unsigned Size, const MCFixup &Fixup, uint64_t &Value,
- MCContext *Ctx = nullptr) {
+ MCContext *Ctx) {
adjustBranch(Size, Fixup, Value, Ctx);
auto top = Value & (0xf00000 << 6); // the top four bits
@@ -121,7 +125,7 @@ static void fixup_call(unsigned Size, const MCFixup &Fixup, uint64_t &Value,
/// 0000 00kk kkkk k000
/// Offset of 0 (so the result is left shifted by 3 bits before application).
static void fixup_7_pcrel(unsigned Size, const MCFixup &Fixup, uint64_t &Value,
- MCContext *Ctx = nullptr) {
+ MCContext *Ctx) {
adjustRelativeBranch(Size, Fixup, Value, Ctx);
// Because the value may be negative, we must mask out the sign bits
@@ -135,7 +139,7 @@ static void fixup_7_pcrel(unsigned Size, const MCFixup &Fixup, uint64_t &Value,
/// 0000 kkkk kkkk kkkk
/// Offset of 0 (so the result isn't left-shifted before application).
static void fixup_13_pcrel(unsigned Size, const MCFixup &Fixup, uint64_t &Value,
- MCContext *Ctx = nullptr) {
+ MCContext *Ctx) {
adjustRelativeBranch(Size, Fixup, Value, Ctx);
// Because the value may be negative, we must mask out the sign bits
@@ -147,8 +151,7 @@ static void fixup_13_pcrel(unsigned Size, const MCFixup &Fixup, uint64_t &Value,
///
/// Resolves to:
/// 10q0 qq10 0000 1qqq
-static void fixup_6(const MCFixup &Fixup, uint64_t &Value,
- MCContext *Ctx = nullptr) {
+static void fixup_6(const MCFixup &Fixup, uint64_t &Value, MCContext *Ctx) {
unsigned_width(6, Value, std::string("immediate"), Fixup, Ctx);
Value = ((Value & 0x20) << 8) | ((Value & 0x18) << 7) | (Value & 0x07);
@@ -160,7 +163,7 @@ static void fixup_6(const MCFixup &Fixup, uint64_t &Value,
/// Resolves to:
/// 0000 0000 kk00 kkkk
static void fixup_6_adiw(const MCFixup &Fixup, uint64_t &Value,
- MCContext *Ctx = nullptr) {
+ MCContext *Ctx) {
unsigned_width(6, Value, std::string("immediate"), Fixup, Ctx);
Value = ((Value & 0x30) << 2) | (Value & 0x0f);
@@ -170,8 +173,7 @@ static void fixup_6_adiw(const MCFixup &Fixup, uint64_t &Value,
///
/// Resolves to:
/// 0000 0000 AAAA A000
-static void fixup_port5(const MCFixup &Fixup, uint64_t &Value,
- MCContext *Ctx = nullptr) {
+static void fixup_port5(const MCFixup &Fixup, uint64_t &Value, MCContext *Ctx) {
unsigned_width(5, Value, std::string("port number"), Fixup, Ctx);
Value &= 0x1f;
@@ -183,8 +185,7 @@ static void fixup_port5(const MCFixup &Fixup, uint64_t &Value,
///
/// Resolves to:
/// 1011 0AAd dddd AAAA
-static void fixup_port6(const MCFixup &Fixup, uint64_t &Value,
- MCContext *Ctx = nullptr) {
+static void fixup_port6(const MCFixup &Fixup, uint64_t &Value, MCContext *Ctx) {
unsigned_width(6, Value, std::string("port number"), Fixup, Ctx);
Value = ((Value & 0x30) << 5) | (Value & 0x0f);
@@ -195,7 +196,7 @@ static void fixup_port6(const MCFixup &Fixup, uint64_t &Value,
/// Resolves to:
/// 1010 ikkk dddd kkkk
static void fixup_lds_sts_16(const MCFixup &Fixup, uint64_t &Value,
- MCContext *Ctx = nullptr) {
+ MCContext *Ctx) {
unsigned_width(7, Value, std::string("immediate"), Fixup, Ctx);
Value = ((Value & 0x70) << 8) | (Value & 0x0f);
}
@@ -213,7 +214,7 @@ namespace ldi {
/// 0000 KKKK 0000 KKKK
/// Offset of 0 (so the result isn't left-shifted before application).
static void fixup(unsigned Size, const MCFixup &Fixup, uint64_t &Value,
- MCContext *Ctx = nullptr) {
+ MCContext *Ctx) {
uint64_t upper = Value & 0xf0;
uint64_t lower = Value & 0x0f;
@@ -223,25 +224,25 @@ static void fixup(unsigned Size, const MCFixup &Fixup, uint64_t &Value,
static void neg(uint64_t &Value) { Value *= -1; }
static void lo8(unsigned Size, const MCFixup &Fixup, uint64_t &Value,
- MCContext *Ctx = nullptr) {
+ MCContext *Ctx) {
Value &= 0xff;
ldi::fixup(Size, Fixup, Value, Ctx);
}
static void hi8(unsigned Size, const MCFixup &Fixup, uint64_t &Value,
- MCContext *Ctx = nullptr) {
+ MCContext *Ctx) {
Value = (Value & 0xff00) >> 8;
ldi::fixup(Size, Fixup, Value, Ctx);
}
static void hh8(unsigned Size, const MCFixup &Fixup, uint64_t &Value,
- MCContext *Ctx = nullptr) {
+ MCContext *Ctx) {
Value = (Value & 0xff0000) >> 16;
ldi::fixup(Size, Fixup, Value, Ctx);
}
static void ms8(unsigned Size, const MCFixup &Fixup, uint64_t &Value,
- MCContext *Ctx = nullptr) {
+ MCContext *Ctx) {
Value = (Value & 0xff000000) >> 24;
ldi::fixup(Size, Fixup, Value, Ctx);
}
diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
index 5d865a3..62b5b70 100644
--- a/llvm/lib/Target/DirectX/DXIL.td
+++ b/llvm/lib/Target/DirectX/DXIL.td
@@ -42,8 +42,10 @@ def FloatTy : DXILOpParamType;
def DoubleTy : DXILOpParamType;
def ResRetHalfTy : DXILOpParamType;
def ResRetFloatTy : DXILOpParamType;
+def ResRetDoubleTy : DXILOpParamType;
def ResRetInt16Ty : DXILOpParamType;
def ResRetInt32Ty : DXILOpParamType;
+def ResRetInt64Ty : DXILOpParamType;
def HandleTy : DXILOpParamType;
def ResBindTy : DXILOpParamType;
def ResPropsTy : DXILOpParamType;
@@ -890,6 +892,23 @@ def SplitDouble : DXILOp<102, splitDouble> {
let attributes = [Attributes<DXIL1_0, [ReadNone]>];
}
+def RawBufferLoad : DXILOp<139, rawBufferLoad> {
+ let Doc = "reads from a raw buffer and structured buffer";
+ // Handle, Coord0, Coord1, Mask, Alignment
+ let arguments = [HandleTy, Int32Ty, Int32Ty, Int8Ty, Int32Ty];
+ let result = OverloadTy;
+ let overloads = [
+ Overloads<DXIL1_2,
+ [ResRetHalfTy, ResRetFloatTy, ResRetInt16Ty, ResRetInt32Ty]>,
+ Overloads<DXIL1_3,
+ [
+ ResRetHalfTy, ResRetFloatTy, ResRetDoubleTy, ResRetInt16Ty,
+ ResRetInt32Ty, ResRetInt64Ty
+ ]>
+ ];
+ let stages = [Stages<DXIL1_2, [all_stages]>];
+}
+
def Dot4AddI8Packed : DXILOp<163, dot4AddPacked> {
let Doc = "signed dot product of 4 x i8 vectors packed into i32, with "
"accumulate to i32";
diff --git a/llvm/lib/Target/DirectX/DXILOpBuilder.cpp b/llvm/lib/Target/DirectX/DXILOpBuilder.cpp
index 5d5bb3e..9f88ccd 100644
--- a/llvm/lib/Target/DirectX/DXILOpBuilder.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpBuilder.cpp
@@ -263,10 +263,14 @@ static Type *getTypeFromOpParamType(OpParamType Kind, LLVMContext &Ctx,
return getResRetType(Type::getHalfTy(Ctx));
case OpParamType::ResRetFloatTy:
return getResRetType(Type::getFloatTy(Ctx));
+ case OpParamType::ResRetDoubleTy:
+ return getResRetType(Type::getDoubleTy(Ctx));
case OpParamType::ResRetInt16Ty:
return getResRetType(Type::getInt16Ty(Ctx));
case OpParamType::ResRetInt32Ty:
return getResRetType(Type::getInt32Ty(Ctx));
+ case OpParamType::ResRetInt64Ty:
+ return getResRetType(Type::getInt64Ty(Ctx));
case OpParamType::HandleTy:
return getHandleType(Ctx);
case OpParamType::ResBindTy:
diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
index 4e01dd1..f43815b 100644
--- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
@@ -415,8 +415,16 @@ public:
}
}
- OldResult = cast<Instruction>(
- IRB.CreateExtractValue(Op, 0, OldResult->getName()));
+ if (OldResult->use_empty()) {
+ // Only the check bit was used, so we're done here.
+ OldResult->eraseFromParent();
+ return Error::success();
+ }
+
+ assert(OldResult->hasOneUse() &&
+ isa<ExtractValueInst>(*OldResult->user_begin()) &&
+ "Expected only use to be extract of first element");
+ OldResult = cast<Instruction>(*OldResult->user_begin());
OldTy = ST->getElementType(0);
}
@@ -534,6 +542,48 @@ public:
});
}
+ [[nodiscard]] bool lowerRawBufferLoad(Function &F) {
+ Triple TT(Triple(M.getTargetTriple()));
+ VersionTuple DXILVersion = TT.getDXILVersion();
+ const DataLayout &DL = F.getDataLayout();
+ IRBuilder<> &IRB = OpBuilder.getIRB();
+ Type *Int8Ty = IRB.getInt8Ty();
+ Type *Int32Ty = IRB.getInt32Ty();
+
+ return replaceFunction(F, [&](CallInst *CI) -> Error {
+ IRB.SetInsertPoint(CI);
+
+ Type *OldTy = cast<StructType>(CI->getType())->getElementType(0);
+ Type *ScalarTy = OldTy->getScalarType();
+ Type *NewRetTy = OpBuilder.getResRetType(ScalarTy);
+
+ Value *Handle =
+ createTmpHandleCast(CI->getArgOperand(0), OpBuilder.getHandleType());
+ Value *Index0 = CI->getArgOperand(1);
+ Value *Index1 = CI->getArgOperand(2);
+ uint64_t NumElements =
+ DL.getTypeSizeInBits(OldTy) / DL.getTypeSizeInBits(ScalarTy);
+ Value *Mask = ConstantInt::get(Int8Ty, ~(~0U << NumElements));
+ Value *Align =
+ ConstantInt::get(Int32Ty, DL.getPrefTypeAlign(ScalarTy).value());
+
+ Expected<CallInst *> OpCall =
+ DXILVersion >= VersionTuple(1, 2)
+ ? OpBuilder.tryCreateOp(OpCode::RawBufferLoad,
+ {Handle, Index0, Index1, Mask, Align},
+ CI->getName(), NewRetTy)
+ : OpBuilder.tryCreateOp(OpCode::BufferLoad,
+ {Handle, Index0, Index1}, CI->getName(),
+ NewRetTy);
+ if (Error E = OpCall.takeError())
+ return E;
+ if (Error E = replaceResRetUses(CI, *OpCall, /*HasCheckBit=*/true))
+ return E;
+
+ return Error::success();
+ });
+ }
+
[[nodiscard]] bool lowerUpdateCounter(Function &F) {
IRBuilder<> &IRB = OpBuilder.getIRB();
Type *Int32Ty = IRB.getInt32Ty();
@@ -723,14 +773,14 @@ public:
HasErrors |= lowerGetPointer(F);
break;
case Intrinsic::dx_resource_load_typedbuffer:
- HasErrors |= lowerTypedBufferLoad(F, /*HasCheckBit=*/false);
- break;
- case Intrinsic::dx_resource_loadchecked_typedbuffer:
HasErrors |= lowerTypedBufferLoad(F, /*HasCheckBit=*/true);
break;
case Intrinsic::dx_resource_store_typedbuffer:
HasErrors |= lowerTypedBufferStore(F);
break;
+ case Intrinsic::dx_resource_load_rawbuffer:
+ HasErrors |= lowerRawBufferLoad(F);
+ break;
case Intrinsic::dx_resource_updatecounter:
HasErrors |= lowerUpdateCounter(F);
break;
diff --git a/llvm/lib/Target/DirectX/DXILResourceAccess.cpp b/llvm/lib/Target/DirectX/DXILResourceAccess.cpp
index 1ff8f09..8376249 100644
--- a/llvm/lib/Target/DirectX/DXILResourceAccess.cpp
+++ b/llvm/lib/Target/DirectX/DXILResourceAccess.cpp
@@ -30,6 +30,9 @@ static void replaceTypedBufferAccess(IntrinsicInst *II,
"Unexpected typed buffer type");
Type *ContainedType = HandleType->getTypeParameter(0);
+ Type *LoadType =
+ StructType::get(ContainedType, Type::getInt1Ty(II->getContext()));
+
// We need the size of an element in bytes so that we can calculate the offset
// in elements given a total offset in bytes later.
Type *ScalarType = ContainedType->getScalarType();
@@ -81,13 +84,15 @@ static void replaceTypedBufferAccess(IntrinsicInst *II,
// We're storing a scalar, so we need to load the current value and only
// replace the relevant part.
auto *Load = Builder.CreateIntrinsic(
- ContainedType, Intrinsic::dx_resource_load_typedbuffer,
+ LoadType, Intrinsic::dx_resource_load_typedbuffer,
{II->getOperand(0), II->getOperand(1)});
+ auto *Struct = Builder.CreateExtractValue(Load, {0});
+
// If we have an offset from seeing a GEP earlier, use it.
Value *IndexOp = Current.Index
? Current.Index
: ConstantInt::get(Builder.getInt32Ty(), 0);
- V = Builder.CreateInsertElement(Load, V, IndexOp);
+ V = Builder.CreateInsertElement(Struct, V, IndexOp);
} else {
llvm_unreachable("Store to typed resource has invalid type");
}
@@ -101,8 +106,10 @@ static void replaceTypedBufferAccess(IntrinsicInst *II,
} else if (auto *LI = dyn_cast<LoadInst>(Current.Access)) {
IRBuilder<> Builder(LI);
Value *V = Builder.CreateIntrinsic(
- ContainedType, Intrinsic::dx_resource_load_typedbuffer,
+ LoadType, Intrinsic::dx_resource_load_typedbuffer,
{II->getOperand(0), II->getOperand(1)});
+ V = Builder.CreateExtractValue(V, {0});
+
if (Current.Index)
V = Builder.CreateExtractElement(V, Current.Index);
diff --git a/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp b/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp
index 45aadac..be68d46 100644
--- a/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp
+++ b/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp
@@ -749,8 +749,8 @@ uint64_t DXILBitcodeWriter::getOptimizationFlags(const Value *V) {
if (PEO->isExact())
Flags |= 1 << bitc::PEO_EXACT;
} else if (const auto *FPMO = dyn_cast<FPMathOperator>(V)) {
- if (FPMO->hasAllowReassoc())
- Flags |= bitc::AllowReassoc;
+ if (FPMO->hasAllowReassoc() || FPMO->hasAllowContract())
+ Flags |= bitc::UnsafeAlgebra;
if (FPMO->hasNoNaNs())
Flags |= bitc::NoNaNs;
if (FPMO->hasNoInfs())
@@ -759,10 +759,6 @@ uint64_t DXILBitcodeWriter::getOptimizationFlags(const Value *V) {
Flags |= bitc::NoSignedZeros;
if (FPMO->hasAllowReciprocal())
Flags |= bitc::AllowReciprocal;
- if (FPMO->hasAllowContract())
- Flags |= bitc::AllowContract;
- if (FPMO->hasApproxFunc())
- Flags |= bitc::ApproxFunc;
}
return Flags;
diff --git a/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp b/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
index 46a8ab3..991ee5b 100644
--- a/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
@@ -1796,6 +1796,8 @@ bool PolynomialMultiplyRecognize::recognize() {
IterCount = CV->getValue()->getZExtValue() + 1;
Value *CIV = getCountIV(LoopB);
+ if (CIV == nullptr)
+ return false;
ParsedValues PV;
Simplifier PreSimp;
PV.IterCount = IterCount;
diff --git a/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp b/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp
index 30742c7..0218934 100644
--- a/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp
@@ -352,11 +352,13 @@ bool LoongArchPreRAExpandPseudo::expandLoadAddressTLSLE(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
MachineBasicBlock::iterator &NextMBBI) {
// Code Sequence:
+ // lu12i.w $rd, %le_hi20_r(sym)
+ // add.w/d $rd, $rd, $tp, %le_add_r(sym)
+ // addi.w/d $rd, $rd, %le_lo12_r(sym)
+ //
+ // Code Sequence while using the large code model:
// lu12i.w $rd, %le_hi20(sym)
// ori $rd, $rd, %le_lo12(sym)
- //
- // And additionally if generating code using the large code model:
- //
// lu32i.d $rd, %le64_lo20(sym)
// lu52i.d $rd, $rd, %le64_hi12(sym)
MachineFunction *MF = MBB.getParent();
@@ -366,20 +368,35 @@ bool LoongArchPreRAExpandPseudo::expandLoadAddressTLSLE(
bool Large = MF->getTarget().getCodeModel() == CodeModel::Large;
Register DestReg = MI.getOperand(0).getReg();
Register Parts01 =
- Large ? MF->getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass)
- : DestReg;
+ MF->getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass);
Register Part1 =
MF->getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass);
MachineOperand &Symbol = MI.getOperand(1);
- BuildMI(MBB, MBBI, DL, TII->get(LoongArch::LU12I_W), Part1)
- .addDisp(Symbol, 0, LoongArchII::MO_LE_HI);
+ if (!Large) {
+ BuildMI(MBB, MBBI, DL, TII->get(LoongArch::LU12I_W), Part1)
+ .addDisp(Symbol, 0, LoongArchII::MO_LE_HI_R);
- BuildMI(MBB, MBBI, DL, TII->get(LoongArch::ORI), Parts01)
- .addReg(Part1, RegState::Kill)
- .addDisp(Symbol, 0, LoongArchII::MO_LE_LO);
+ const auto &STI = MF->getSubtarget<LoongArchSubtarget>();
+ unsigned AddOp = STI.is64Bit() ? LoongArch::PseudoAddTPRel_D
+ : LoongArch::PseudoAddTPRel_W;
+ BuildMI(MBB, MBBI, DL, TII->get(AddOp), Parts01)
+ .addReg(Part1, RegState::Kill)
+ .addReg(LoongArch::R2)
+ .addDisp(Symbol, 0, LoongArchII::MO_LE_ADD_R);
+
+ unsigned AddiOp = STI.is64Bit() ? LoongArch::ADDI_D : LoongArch::ADDI_W;
+ BuildMI(MBB, MBBI, DL, TII->get(AddiOp), DestReg)
+ .addReg(Parts01, RegState::Kill)
+ .addDisp(Symbol, 0, LoongArchII::MO_LE_LO_R);
+ } else {
+ BuildMI(MBB, MBBI, DL, TII->get(LoongArch::LU12I_W), Part1)
+ .addDisp(Symbol, 0, LoongArchII::MO_LE_HI);
+
+ BuildMI(MBB, MBBI, DL, TII->get(LoongArch::ORI), Parts01)
+ .addReg(Part1, RegState::Kill)
+ .addDisp(Symbol, 0, LoongArchII::MO_LE_LO);
- if (Large) {
Register Parts012 =
MF->getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass);
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 7f67def..96e6f71 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -1866,9 +1866,17 @@ SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
// PseudoLA_*_LARGE nodes.
SDValue Tmp = DAG.getConstant(0, DL, Ty);
SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
- SDValue Offset = Large
+
+ // Only IE needs an extra argument for large code model.
+ SDValue Offset = Opc == LoongArch::PseudoLA_TLS_IE_LARGE
? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
: SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
+
+ // If it is LE for normal/medium code model, the add tp operation will occur
+ // during the pseudo-instruction expansion.
+ if (Opc == LoongArch::PseudoLA_TLS_LE && !Large)
+ return Offset;
+
if (UseGOT) {
// Mark the load instruction as invariant to enable hoisting in MachineLICM.
MachineFunction &MF = DAG.getMachineFunction();
@@ -1989,7 +1997,7 @@ LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op,
//
// This node doesn't need an extra argument for the large code model.
return getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LE,
- /*UseGOT=*/false);
+ /*UseGOT=*/false, Large);
}
return getTLSDescAddr(N, DAG,
diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp
index 363cacf..32bc8bb 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp
@@ -154,6 +154,9 @@ void LoongArchInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
Register VReg) const {
MachineFunction *MF = MBB.getParent();
MachineFrameInfo &MFI = MF->getFrameInfo();
+ DebugLoc DL;
+ if (I != MBB.end())
+ DL = I->getDebugLoc();
unsigned Opcode;
if (LoongArch::GPRRegClass.hasSubClassEq(RC))
@@ -177,7 +180,7 @@ void LoongArchInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad,
MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
- BuildMI(MBB, I, DebugLoc(), get(Opcode), DstReg)
+ BuildMI(MBB, I, DL, get(Opcode), DstReg)
.addFrameIndex(FI)
.addImm(0)
.addMemOperand(MMO);
@@ -406,6 +409,11 @@ bool LoongArchInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
// lu32i.d $a1, %ie64_pc_lo20(s)
// lu52i.d $a1, $a1, %ie64_pc_hi12(s)
//
+ // * pcalau12i $a0, %desc_pc_hi20(s)
+ // addi.d $a1, $zero, %desc_pc_lo12(s)
+ // lu32i.d $a1, %desc64_pc_lo20(s)
+ // lu52i.d $a1, $a1, %desc64_pc_hi12(s)
+ //
// For simplicity, only pcalau12i and lu52i.d are marked as scheduling
// boundaries, and the instructions between them are guaranteed to be
// ordered according to data dependencies.
@@ -430,12 +438,16 @@ bool LoongArchInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
if (MO0 == LoongArchII::MO_IE_PC_HI && MO1 == LoongArchII::MO_IE_PC_LO &&
MO2 == LoongArchII::MO_IE_PC64_LO)
return true;
+ if (MO0 == LoongArchII::MO_DESC_PC_HI &&
+ MO1 == LoongArchII::MO_DESC_PC_LO &&
+ MO2 == LoongArchII::MO_DESC64_PC_LO)
+ return true;
break;
}
case LoongArch::LU52I_D: {
auto MO = MI.getOperand(2).getTargetFlags();
if (MO == LoongArchII::MO_PCREL64_HI || MO == LoongArchII::MO_GOT_PC64_HI ||
- MO == LoongArchII::MO_IE_PC64_HI)
+ MO == LoongArchII::MO_IE_PC64_HI || MO == LoongArchII::MO_DESC64_PC_HI)
return true;
break;
}
@@ -651,7 +663,10 @@ LoongArchInstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
{MO_DESC_LD, "loongarch-desc-ld"},
{MO_DESC_CALL, "loongarch-desc-call"},
{MO_LD_PC_HI, "loongarch-ld-pc-hi"},
- {MO_GD_PC_HI, "loongarch-gd-pc-hi"}};
+ {MO_GD_PC_HI, "loongarch-gd-pc-hi"},
+ {MO_LE_HI_R, "loongarch-le-hi-r"},
+ {MO_LE_ADD_R, "loongarch-le-add-r"},
+ {MO_LE_LO_R, "loongarch-le-lo-r"}};
return ArrayRef(TargetFlags);
}
diff --git a/llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp b/llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp
index 2bacc12..d1de060 100644
--- a/llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp
@@ -114,6 +114,15 @@ static MCOperand lowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym,
case LoongArchII::MO_DESC_CALL:
Kind = LoongArchMCExpr::VK_LoongArch_TLS_DESC_CALL;
break;
+ case LoongArchII::MO_LE_HI_R:
+ Kind = LoongArchMCExpr::VK_LoongArch_TLS_LE_HI20_R;
+ break;
+ case LoongArchII::MO_LE_ADD_R:
+ Kind = LoongArchMCExpr::VK_LoongArch_TLS_LE_ADD_R;
+ break;
+ case LoongArchII::MO_LE_LO_R:
+ Kind = LoongArchMCExpr::VK_LoongArch_TLS_LE_LO12_R;
+ break;
// TODO: Handle more target-flags.
}
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h
index bd63c5e..2369904 100644
--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h
@@ -54,6 +54,9 @@ enum {
MO_DESC64_PC_LO,
MO_DESC_LD,
MO_DESC_CALL,
+ MO_LE_HI_R,
+ MO_LE_ADD_R,
+ MO_LE_LO_R,
// TODO: Add more flags.
};
diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp
index 65e1893..d34f45f 100644
--- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp
+++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp
@@ -14,7 +14,7 @@
#include "NVPTX.h"
#include "NVPTXUtilities.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/IR/NVVMIntrinsicFlags.h"
+#include "llvm/IR/NVVMIntrinsicUtils.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
diff --git a/llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.cpp
index f940dc0..c03ef8d 100644
--- a/llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.cpp
@@ -14,6 +14,7 @@
#include "MCTargetDesc/NVPTXBaseInfo.h"
#include "NVPTX.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"
@@ -49,39 +50,34 @@ static std::string getHash(StringRef Str) {
return llvm::utohexstr(Hash.low(), /*LowerCase=*/true);
}
-static void addKernelMetadata(Module &M, GlobalValue *GV) {
+static void addKernelMetadata(Module &M, Function *F) {
llvm::LLVMContext &Ctx = M.getContext();
// Get "nvvm.annotations" metadata node.
llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("nvvm.annotations");
- llvm::Metadata *KernelMDVals[] = {
- llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, "kernel"),
- llvm::ConstantAsMetadata::get(
- llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1))};
-
// This kernel is only to be called single-threaded.
llvm::Metadata *ThreadXMDVals[] = {
- llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, "maxntidx"),
+ llvm::ConstantAsMetadata::get(F), llvm::MDString::get(Ctx, "maxntidx"),
llvm::ConstantAsMetadata::get(
llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1))};
llvm::Metadata *ThreadYMDVals[] = {
- llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, "maxntidy"),
+ llvm::ConstantAsMetadata::get(F), llvm::MDString::get(Ctx, "maxntidy"),
llvm::ConstantAsMetadata::get(
llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1))};
llvm::Metadata *ThreadZMDVals[] = {
- llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, "maxntidz"),
+ llvm::ConstantAsMetadata::get(F), llvm::MDString::get(Ctx, "maxntidz"),
llvm::ConstantAsMetadata::get(
llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1))};
llvm::Metadata *BlockMDVals[] = {
- llvm::ConstantAsMetadata::get(GV),
+ llvm::ConstantAsMetadata::get(F),
llvm::MDString::get(Ctx, "maxclusterrank"),
llvm::ConstantAsMetadata::get(
llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1))};
// Append metadata to nvvm.annotations.
- MD->addOperand(llvm::MDNode::get(Ctx, KernelMDVals));
+ F->setCallingConv(CallingConv::PTX_Kernel);
MD->addOperand(llvm::MDNode::get(Ctx, ThreadXMDVals));
MD->addOperand(llvm::MDNode::get(Ctx, ThreadYMDVals));
MD->addOperand(llvm::MDNode::get(Ctx, ThreadZMDVals));
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index c51729e..ef97844 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -14,10 +14,11 @@
#include "NVPTXUtilities.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicsNVPTX.h"
-#include "llvm/IR/NVVMIntrinsicFlags.h"
+#include "llvm/IR/NVVMIntrinsicUtils.h"
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
@@ -2449,6 +2450,11 @@ bool NVPTXDAGToDAGISel::tryBFE(SDNode *N) {
return true;
}
+static inline bool isAddLike(const SDValue V) {
+ return V.getOpcode() == ISD::ADD ||
+ (V->getOpcode() == ISD::OR && V->getFlags().hasDisjoint());
+}
+
// SelectDirectAddr - Match a direct address for DAG.
// A direct address could be a globaladdress or externalsymbol.
bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
@@ -2475,7 +2481,7 @@ bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
// symbol+offset
bool NVPTXDAGToDAGISel::SelectADDRsi_imp(
SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
- if (Addr.getOpcode() == ISD::ADD) {
+ if (isAddLike(Addr)) {
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
SDValue base = Addr.getOperand(0);
if (SelectDirectAddr(base, Base)) {
@@ -2512,7 +2518,7 @@ bool NVPTXDAGToDAGISel::SelectADDRri_imp(
Addr.getOpcode() == ISD::TargetGlobalAddress)
return false; // direct calls.
- if (Addr.getOpcode() == ISD::ADD) {
+ if (isAddLike(Addr)) {
if (SelectDirectAddr(Addr.getOperand(0), Addr)) {
return false;
}
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
index 4a98fe2..c9b7e87 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -261,6 +261,9 @@ public:
return true;
}
+ bool isFAbsFree(EVT VT) const override { return true; }
+ bool isFNegFree(EVT VT) const override { return true; }
+
private:
const NVPTXSubtarget &STI; // cache the subtarget here
SDValue getParamSymbol(SelectionDAG &DAG, int idx, EVT) const;
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index 711cd67..c3e72d6 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -733,12 +733,12 @@ def fpround_oneuse : PatFrag<(ops node:$a), (fpround node:$a), [{
def : Pat<(v2bf16 (build_vector (bf16 (fpround_oneuse f32:$lo)),
(bf16 (fpround_oneuse f32:$hi)))),
- (CVT_bf16x2_f32 Float32Regs:$hi, Float32Regs:$lo, CvtRN)>,
+ (CVT_bf16x2_f32 $hi, $lo, CvtRN)>,
Requires<[hasPTX<70>, hasSM<80>, hasBF16Math]>;
def : Pat<(v2f16 (build_vector (f16 (fpround_oneuse f32:$lo)),
(f16 (fpround_oneuse f32:$hi)))),
- (CVT_f16x2_f32 Float32Regs:$hi, Float32Regs:$lo, CvtRN)>,
+ (CVT_f16x2_f32 $hi, $lo, CvtRN)>,
Requires<[hasPTX<70>, hasSM<80>, useFP16Math]>;
//-----------------------------------
@@ -813,7 +813,7 @@ defm SELP_f64 : SELP_PATTERN<"f64", f64, Float64Regs, f64imm, fpimm>;
foreach vt = [v2f16, v2bf16, v2i16, v4i8] in {
def : Pat<(vt (select i1:$p, vt:$a, vt:$b)),
- (SELP_b32rr Int32Regs:$a, Int32Regs:$b, Int1Regs:$p)>;
+ (SELP_b32rr $a, $b, $p)>;
}
//-----------------------------------
@@ -952,29 +952,29 @@ def mul_wide_unsigned : SDNode<"NVPTXISD::MUL_WIDE_UNSIGNED", SDTMulWide>;
// Matchers for signed, unsigned mul.wide ISD nodes.
def : Pat<(i32 (mul_wide_signed i16:$a, i16:$b)),
- (MULWIDES32 i16:$a, i16:$b)>,
+ (MULWIDES32 $a, $b)>,
Requires<[doMulWide]>;
def : Pat<(i32 (mul_wide_signed i16:$a, imm:$b)),
- (MULWIDES32Imm Int16Regs:$a, imm:$b)>,
+ (MULWIDES32Imm $a, imm:$b)>,
Requires<[doMulWide]>;
def : Pat<(i32 (mul_wide_unsigned i16:$a, i16:$b)),
- (MULWIDEU32 Int16Regs:$a, Int16Regs:$b)>,
+ (MULWIDEU32 $a, $b)>,
Requires<[doMulWide]>;
def : Pat<(i32 (mul_wide_unsigned i16:$a, imm:$b)),
- (MULWIDEU32Imm Int16Regs:$a, imm:$b)>,
+ (MULWIDEU32Imm $a, imm:$b)>,
Requires<[doMulWide]>;
def : Pat<(i64 (mul_wide_signed i32:$a, i32:$b)),
- (MULWIDES64 Int32Regs:$a, Int32Regs:$b)>,
+ (MULWIDES64 $a, $b)>,
Requires<[doMulWide]>;
def : Pat<(i64 (mul_wide_signed i32:$a, imm:$b)),
- (MULWIDES64Imm Int32Regs:$a, imm:$b)>,
+ (MULWIDES64Imm $a, imm:$b)>,
Requires<[doMulWide]>;
def : Pat<(i64 (mul_wide_unsigned i32:$a, i32:$b)),
- (MULWIDEU64 Int32Regs:$a, Int32Regs:$b)>,
+ (MULWIDEU64 $a, $b)>,
Requires<[doMulWide]>;
def : Pat<(i64 (mul_wide_unsigned i32:$a, imm:$b)),
- (MULWIDEU64Imm Int32Regs:$a, imm:$b)>,
+ (MULWIDEU64Imm $a, imm:$b)>,
Requires<[doMulWide]>;
// Predicates used for converting some patterns to mul.wide.
@@ -1024,46 +1024,46 @@ def SHL2MUL16 : SDNodeXForm<imm, [{
// Convert "sign/zero-extend, then shift left by an immediate" to mul.wide.
def : Pat<(shl (sext i32:$a), (i32 IntConst_0_30:$b)),
- (MULWIDES64Imm Int32Regs:$a, (SHL2MUL32 node:$b))>,
+ (MULWIDES64Imm $a, (SHL2MUL32 $b))>,
Requires<[doMulWide]>;
def : Pat<(shl (zext i32:$a), (i32 IntConst_0_30:$b)),
- (MULWIDEU64Imm Int32Regs:$a, (SHL2MUL32 node:$b))>,
+ (MULWIDEU64Imm $a, (SHL2MUL32 $b))>,
Requires<[doMulWide]>;
def : Pat<(shl (sext i16:$a), (i16 IntConst_0_14:$b)),
- (MULWIDES32Imm Int16Regs:$a, (SHL2MUL16 node:$b))>,
+ (MULWIDES32Imm $a, (SHL2MUL16 $b))>,
Requires<[doMulWide]>;
def : Pat<(shl (zext i16:$a), (i16 IntConst_0_14:$b)),
- (MULWIDEU32Imm Int16Regs:$a, (SHL2MUL16 node:$b))>,
+ (MULWIDEU32Imm $a, (SHL2MUL16 $b))>,
Requires<[doMulWide]>;
// Convert "sign/zero-extend then multiply" to mul.wide.
def : Pat<(mul (sext i32:$a), (sext i32:$b)),
- (MULWIDES64 Int32Regs:$a, Int32Regs:$b)>,
+ (MULWIDES64 $a, $b)>,
Requires<[doMulWide]>;
def : Pat<(mul (sext i32:$a), (i64 SInt32Const:$b)),
- (MULWIDES64Imm64 Int32Regs:$a, (i64 SInt32Const:$b))>,
+ (MULWIDES64Imm64 $a, (i64 SInt32Const:$b))>,
Requires<[doMulWide]>;
def : Pat<(mul (zext i32:$a), (zext i32:$b)),
- (MULWIDEU64 Int32Regs:$a, Int32Regs:$b)>,
+ (MULWIDEU64 $a, $b)>,
Requires<[doMulWide]>;
def : Pat<(mul (zext i32:$a), (i64 UInt32Const:$b)),
- (MULWIDEU64Imm64 Int32Regs:$a, (i64 UInt32Const:$b))>,
+ (MULWIDEU64Imm64 $a, (i64 UInt32Const:$b))>,
Requires<[doMulWide]>;
def : Pat<(mul (sext i16:$a), (sext i16:$b)),
- (MULWIDES32 Int16Regs:$a, Int16Regs:$b)>,
+ (MULWIDES32 $a, $b)>,
Requires<[doMulWide]>;
def : Pat<(mul (sext i16:$a), (i32 SInt16Const:$b)),
- (MULWIDES32Imm32 Int16Regs:$a, (i32 SInt16Const:$b))>,
+ (MULWIDES32Imm32 $a, (i32 SInt16Const:$b))>,
Requires<[doMulWide]>;
def : Pat<(mul (zext i16:$a), (zext i16:$b)),
- (MULWIDEU32 Int16Regs:$a, Int16Regs:$b)>,
+ (MULWIDEU32 $a, $b)>,
Requires<[doMulWide]>;
def : Pat<(mul (zext i16:$a), (i32 UInt16Const:$b)),
- (MULWIDEU32Imm32 Int16Regs:$a, (i32 UInt16Const:$b))>,
+ (MULWIDEU32Imm32 $a, (i32 UInt16Const:$b))>,
Requires<[doMulWide]>;
//
@@ -1242,7 +1242,7 @@ def FDIV64ri :
// fdiv will be converted to rcp
// fneg (fdiv 1.0, X) => fneg (rcp.rn X)
def : Pat<(fdiv DoubleConstNeg1:$a, f64:$b),
- (FNEGf64 (FDIV641r (NegDoubleConst node:$a), Float64Regs:$b))>;
+ (FNEGf64 (FDIV641r (NegDoubleConst node:$a), $b))>;
//
// F32 Approximate reciprocal
@@ -1436,83 +1436,83 @@ def COSF: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
// frem - f32 FTZ
def : Pat<(frem f32:$x, f32:$y),
- (FSUBf32rr_ftz Float32Regs:$x, (FMULf32rr_ftz (CVT_f32_f32
- (FDIV32rr_prec_ftz Float32Regs:$x, Float32Regs:$y), CvtRZI_FTZ),
- Float32Regs:$y))>,
+ (FSUBf32rr_ftz $x, (FMULf32rr_ftz (CVT_f32_f32
+ (FDIV32rr_prec_ftz $x, $y), CvtRZI_FTZ),
+ $y))>,
Requires<[doF32FTZ, allowUnsafeFPMath]>;
def : Pat<(frem f32:$x, fpimm:$y),
- (FSUBf32rr_ftz Float32Regs:$x, (FMULf32ri_ftz (CVT_f32_f32
- (FDIV32ri_prec_ftz Float32Regs:$x, fpimm:$y), CvtRZI_FTZ),
+ (FSUBf32rr_ftz $x, (FMULf32ri_ftz (CVT_f32_f32
+ (FDIV32ri_prec_ftz $x, fpimm:$y), CvtRZI_FTZ),
fpimm:$y))>,
Requires<[doF32FTZ, allowUnsafeFPMath]>;
-def : Pat<(frem f32:$x, Float32Regs:$y),
- (SELP_f32rr Float32Regs:$x,
- (FSUBf32rr_ftz Float32Regs:$x, (FMULf32rr_ftz (CVT_f32_f32
- (FDIV32rr_prec_ftz Float32Regs:$x, Float32Regs:$y), CvtRZI_FTZ),
- Float32Regs:$y)),
- (TESTINF_f32r Float32Regs:$y))>,
+def : Pat<(frem f32:$x, f32:$y),
+ (SELP_f32rr $x,
+ (FSUBf32rr_ftz $x, (FMULf32rr_ftz (CVT_f32_f32
+ (FDIV32rr_prec_ftz $x, $y), CvtRZI_FTZ),
+ $y)),
+ (TESTINF_f32r $y))>,
Requires<[doF32FTZ, noUnsafeFPMath]>;
def : Pat<(frem f32:$x, fpimm:$y),
- (SELP_f32rr Float32Regs:$x,
- (FSUBf32rr_ftz Float32Regs:$x, (FMULf32ri_ftz (CVT_f32_f32
- (FDIV32ri_prec_ftz Float32Regs:$x, fpimm:$y), CvtRZI_FTZ),
+ (SELP_f32rr $x,
+ (FSUBf32rr_ftz $x, (FMULf32ri_ftz (CVT_f32_f32
+ (FDIV32ri_prec_ftz $x, fpimm:$y), CvtRZI_FTZ),
fpimm:$y)),
(TESTINF_f32i fpimm:$y))>,
Requires<[doF32FTZ, noUnsafeFPMath]>;
// frem - f32
def : Pat<(frem f32:$x, f32:$y),
- (FSUBf32rr Float32Regs:$x, (FMULf32rr (CVT_f32_f32
- (FDIV32rr_prec Float32Regs:$x, Float32Regs:$y), CvtRZI),
- Float32Regs:$y))>,
+ (FSUBf32rr $x, (FMULf32rr (CVT_f32_f32
+ (FDIV32rr_prec $x, $y), CvtRZI),
+ $y))>,
Requires<[allowUnsafeFPMath]>;
def : Pat<(frem f32:$x, fpimm:$y),
- (FSUBf32rr Float32Regs:$x, (FMULf32ri (CVT_f32_f32
- (FDIV32ri_prec Float32Regs:$x, fpimm:$y), CvtRZI),
+ (FSUBf32rr $x, (FMULf32ri (CVT_f32_f32
+ (FDIV32ri_prec $x, fpimm:$y), CvtRZI),
fpimm:$y))>,
Requires<[allowUnsafeFPMath]>;
def : Pat<(frem f32:$x, f32:$y),
- (SELP_f32rr Float32Regs:$x,
- (FSUBf32rr Float32Regs:$x, (FMULf32rr (CVT_f32_f32
- (FDIV32rr_prec Float32Regs:$x, Float32Regs:$y), CvtRZI),
- Float32Regs:$y)),
+ (SELP_f32rr $x,
+ (FSUBf32rr $x, (FMULf32rr (CVT_f32_f32
+ (FDIV32rr_prec $x, $y), CvtRZI),
+ $y)),
(TESTINF_f32r Float32Regs:$y))>,
Requires<[noUnsafeFPMath]>;
def : Pat<(frem f32:$x, fpimm:$y),
- (SELP_f32rr Float32Regs:$x,
- (FSUBf32rr Float32Regs:$x, (FMULf32ri (CVT_f32_f32
- (FDIV32ri_prec Float32Regs:$x, fpimm:$y), CvtRZI),
+ (SELP_f32rr $x,
+ (FSUBf32rr $x, (FMULf32ri (CVT_f32_f32
+ (FDIV32ri_prec $x, fpimm:$y), CvtRZI),
fpimm:$y)),
(TESTINF_f32i fpimm:$y))>,
Requires<[noUnsafeFPMath]>;
// frem - f64
def : Pat<(frem f64:$x, f64:$y),
- (FSUBf64rr Float64Regs:$x, (FMULf64rr (CVT_f64_f64
- (FDIV64rr Float64Regs:$x, Float64Regs:$y), CvtRZI),
- Float64Regs:$y))>,
+ (FSUBf64rr $x, (FMULf64rr (CVT_f64_f64
+ (FDIV64rr $x, $y), CvtRZI),
+ $y))>,
Requires<[allowUnsafeFPMath]>;
def : Pat<(frem f64:$x, fpimm:$y),
- (FSUBf64rr Float64Regs:$x, (FMULf64ri (CVT_f64_f64
- (FDIV64ri Float64Regs:$x, fpimm:$y), CvtRZI),
+ (FSUBf64rr $x, (FMULf64ri (CVT_f64_f64
+ (FDIV64ri $x, fpimm:$y), CvtRZI),
fpimm:$y))>,
Requires<[allowUnsafeFPMath]>;
def : Pat<(frem f64:$x, f64:$y),
- (SELP_f64rr Float64Regs:$x,
- (FSUBf64rr Float64Regs:$x, (FMULf64rr (CVT_f64_f64
- (FDIV64rr Float64Regs:$x, Float64Regs:$y), CvtRZI),
- Float64Regs:$y)),
+ (SELP_f64rr $x,
+ (FSUBf64rr $x, (FMULf64rr (CVT_f64_f64
+ (FDIV64rr $x, $y), CvtRZI),
+ $y)),
(TESTINF_f64r Float64Regs:$y))>,
Requires<[noUnsafeFPMath]>;
def : Pat<(frem f64:$x, fpimm:$y),
- (SELP_f64rr Float64Regs:$x,
- (FSUBf64rr Float64Regs:$x, (FMULf64ri (CVT_f64_f64
- (FDIV64ri Float64Regs:$x, fpimm:$y), CvtRZI),
+ (SELP_f64rr $x,
+ (FSUBf64rr $x, (FMULf64ri (CVT_f64_f64
+ (FDIV64ri $x, fpimm:$y), CvtRZI),
fpimm:$y)),
- (TESTINF_f64r Float64Regs:$y))>,
+ (TESTINF_f64r $y))>,
Requires<[noUnsafeFPMath]>;
//-----------------------------------
@@ -1561,32 +1561,32 @@ defm AND : BITWISE<"and", and>;
defm XOR : BITWISE<"xor", xor>;
// PTX does not support mul on predicates, convert to and instructions
-def : Pat<(mul i1:$a, i1:$b), (ANDb1rr Int1Regs:$a, Int1Regs:$b)>;
-def : Pat<(mul i1:$a, imm:$b), (ANDb1ri Int1Regs:$a, imm:$b)>;
+def : Pat<(mul i1:$a, i1:$b), (ANDb1rr $a, $b)>;
+def : Pat<(mul i1:$a, imm:$b), (ANDb1ri $a, imm:$b)>;
// These transformations were once reliably performed by instcombine, but thanks
// to poison semantics they are no longer safe for LLVM IR, perform them here
// instead.
-def : Pat<(select i1:$a, i1:$b, 0), (ANDb1rr Int1Regs:$a, Int1Regs:$b)>;
-def : Pat<(select i1:$a, 1, i1:$b), (ORb1rr Int1Regs:$a, Int1Regs:$b)>;
+def : Pat<(select i1:$a, i1:$b, 0), (ANDb1rr $a, $b)>;
+def : Pat<(select i1:$a, 1, i1:$b), (ORb1rr $a, $b)>;
// Lower logical v2i16/v4i8 ops as bitwise ops on b32.
foreach vt = [v2i16, v4i8] in {
def: Pat<(or vt:$a, vt:$b),
- (ORb32rr Int32Regs:$a, Int32Regs:$b)>;
+ (ORb32rr $a, $b)>;
def: Pat<(xor vt:$a, vt:$b),
- (XORb32rr Int32Regs:$a, Int32Regs:$b)>;
+ (XORb32rr $a, $b)>;
def: Pat<(and vt:$a, vt:$b),
- (ANDb32rr Int32Regs:$a, Int32Regs:$b)>;
+ (ANDb32rr $a, $b)>;
// The constants get legalized into a bitcast from i32, so that's what we need
// to match here.
def: Pat<(or vt:$a, (vt (bitconvert (i32 imm:$b)))),
- (ORb32ri Int32Regs:$a, imm:$b)>;
+ (ORb32ri $a, imm:$b)>;
def: Pat<(xor vt:$a, (vt (bitconvert (i32 imm:$b)))),
- (XORb32ri Int32Regs:$a, imm:$b)>;
+ (XORb32ri $a, imm:$b)>;
def: Pat<(and vt:$a, (vt (bitconvert (i32 imm:$b)))),
- (ANDb32ri Int32Regs:$a, imm:$b)>;
+ (ANDb32ri $a, imm:$b)>;
}
def NOT1 : NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$src),
@@ -1770,34 +1770,34 @@ let hasSideEffects = false in {
// byte extraction + signed/unsigned extension to i32.
def : Pat<(i32 (sext_inreg (bfe i32:$s, i32:$o, 8), i8)),
- (BFE_S32rri Int32Regs:$s, Int32Regs:$o, 8)>;
+ (BFE_S32rri $s, $o, 8)>;
def : Pat<(i32 (sext_inreg (bfe i32:$s, imm:$o, 8), i8)),
- (BFE_S32rii Int32Regs:$s, imm:$o, 8)>;
+ (BFE_S32rii $s, imm:$o, 8)>;
def : Pat<(i32 (and (bfe i32:$s, i32:$o, 8), 255)),
- (BFE_U32rri Int32Regs:$s, Int32Regs:$o, 8)>;
+ (BFE_U32rri $s, $o, 8)>;
def : Pat<(i32 (and (bfe i32:$s, imm:$o, 8), 255)),
- (BFE_U32rii Int32Regs:$s, imm:$o, 8)>;
+ (BFE_U32rii $s, imm:$o, 8)>;
// byte extraction + signed extension to i16
def : Pat<(i16 (sext_inreg (trunc (bfe i32:$s, imm:$o, 8)), i8)),
- (CVT_s8_s32 (BFE_S32rii i32:$s, imm:$o, 8), CvtNONE)>;
+ (CVT_s8_s32 (BFE_S32rii $s, imm:$o, 8), CvtNONE)>;
// Byte extraction via shift/trunc/sext
def : Pat<(i16 (sext_inreg (trunc i32:$s), i8)),
- (CVT_s8_s32 Int32Regs:$s, CvtNONE)>;
+ (CVT_s8_s32 $s, CvtNONE)>;
def : Pat<(i16 (sext_inreg (trunc (srl i32:$s, (i32 imm:$o))), i8)),
- (CVT_s8_s32 (BFE_S32rii Int32Regs:$s, imm:$o, 8), CvtNONE)>;
+ (CVT_s8_s32 (BFE_S32rii $s, imm:$o, 8), CvtNONE)>;
def : Pat<(sext_inreg (srl i32:$s, (i32 imm:$o)), i8),
- (BFE_S32rii Int32Regs:$s, imm:$o, 8)>;
+ (BFE_S32rii $s, imm:$o, 8)>;
def : Pat<(i16 (sra (i16 (trunc i32:$s)), (i32 8))),
- (CVT_s8_s32 (BFE_S32rii Int32Regs:$s, 8, 8), CvtNONE)>;
+ (CVT_s8_s32 (BFE_S32rii $s, 8, 8), CvtNONE)>;
def : Pat<(sext_inreg (srl i64:$s, (i32 imm:$o)), i8),
- (BFE_S64rii Int64Regs:$s, imm:$o, 8)>;
+ (BFE_S64rii $s, imm:$o, 8)>;
def : Pat<(i16 (sext_inreg (trunc i64:$s), i8)),
- (CVT_s8_s64 Int64Regs:$s, CvtNONE)>;
+ (CVT_s8_s64 $s, CvtNONE)>;
def : Pat<(i16 (sext_inreg (trunc (srl i64:$s, (i32 imm:$o))), i8)),
- (CVT_s8_s64 (BFE_S64rii Int64Regs:$s, imm:$o, 8), CvtNONE)>;
+ (CVT_s8_s64 (BFE_S64rii $s, imm:$o, 8), CvtNONE)>;
//-----------------------------------
// Comparison instructions (setp, set)
@@ -2032,47 +2032,47 @@ multiclass ISET_FORMAT<PatFrag OpNode, PatLeaf Mode,
Instruction set_64ir> {
// i16 -> pred
def : Pat<(i1 (OpNode i16:$a, i16:$b)),
- (setp_16rr Int16Regs:$a, Int16Regs:$b, Mode)>;
+ (setp_16rr $a, $b, Mode)>;
def : Pat<(i1 (OpNode i16:$a, imm:$b)),
- (setp_16ri Int16Regs:$a, imm:$b, Mode)>;
+ (setp_16ri $a, imm:$b, Mode)>;
def : Pat<(i1 (OpNode imm:$a, i16:$b)),
- (setp_16ir imm:$a, Int16Regs:$b, Mode)>;
+ (setp_16ir imm:$a, $b, Mode)>;
// i32 -> pred
def : Pat<(i1 (OpNode i32:$a, i32:$b)),
- (setp_32rr Int32Regs:$a, Int32Regs:$b, Mode)>;
+ (setp_32rr $a, $b, Mode)>;
def : Pat<(i1 (OpNode i32:$a, imm:$b)),
- (setp_32ri Int32Regs:$a, imm:$b, Mode)>;
+ (setp_32ri $a, imm:$b, Mode)>;
def : Pat<(i1 (OpNode imm:$a, i32:$b)),
- (setp_32ir imm:$a, Int32Regs:$b, Mode)>;
+ (setp_32ir imm:$a, $b, Mode)>;
// i64 -> pred
def : Pat<(i1 (OpNode i64:$a, i64:$b)),
- (setp_64rr Int64Regs:$a, Int64Regs:$b, Mode)>;
+ (setp_64rr $a, $b, Mode)>;
def : Pat<(i1 (OpNode i64:$a, imm:$b)),
- (setp_64ri Int64Regs:$a, imm:$b, Mode)>;
+ (setp_64ri $a, imm:$b, Mode)>;
def : Pat<(i1 (OpNode imm:$a, i64:$b)),
- (setp_64ir imm:$a, Int64Regs:$b, Mode)>;
+ (setp_64ir imm:$a, $b, Mode)>;
// i16 -> i32
def : Pat<(i32 (OpNode i16:$a, i16:$b)),
- (set_16rr Int16Regs:$a, Int16Regs:$b, Mode)>;
+ (set_16rr $a, $b, Mode)>;
def : Pat<(i32 (OpNode i16:$a, imm:$b)),
- (set_16ri Int16Regs:$a, imm:$b, Mode)>;
+ (set_16ri $a, imm:$b, Mode)>;
def : Pat<(i32 (OpNode imm:$a, i16:$b)),
- (set_16ir imm:$a, Int16Regs:$b, Mode)>;
+ (set_16ir imm:$a, $b, Mode)>;
// i32 -> i32
def : Pat<(i32 (OpNode i32:$a, i32:$b)),
- (set_32rr Int32Regs:$a, Int32Regs:$b, Mode)>;
+ (set_32rr $a, $b, Mode)>;
def : Pat<(i32 (OpNode i32:$a, imm:$b)),
- (set_32ri Int32Regs:$a, imm:$b, Mode)>;
+ (set_32ri $a, imm:$b, Mode)>;
def : Pat<(i32 (OpNode imm:$a, i32:$b)),
- (set_32ir imm:$a, Int32Regs:$b, Mode)>;
+ (set_32ir imm:$a, $b, Mode)>;
// i64 -> i32
def : Pat<(i32 (OpNode i64:$a, Int64Regs:$b)),
- (set_64rr Int64Regs:$a, Int64Regs:$b, Mode)>;
+ (set_64rr $a, $b, Mode)>;
def : Pat<(i32 (OpNode i64:$a, imm:$b)),
- (set_64ri Int64Regs:$a, imm:$b, Mode)>;
+ (set_64ri $a, imm:$b, Mode)>;
def : Pat<(i32 (OpNode imm:$a, i64:$b)),
- (set_64ir imm:$a, Int64Regs:$b, Mode)>;
+ (set_64ir imm:$a, $b, Mode)>;
}
multiclass ISET_FORMAT_SIGNED<PatFrag OpNode, PatLeaf Mode>
@@ -2179,94 +2179,94 @@ def: Pat<(setne (i16 (and (trunc (bfe Int32Regs:$a, imm:$oa, 8)), 255)),
// i1 compare -> i32
def : Pat<(i32 (setne i1:$a, i1:$b)),
- (SELP_u32ii -1, 0, (XORb1rr Int1Regs:$a, Int1Regs:$b))>;
+ (SELP_u32ii -1, 0, (XORb1rr $a, $b))>;
def : Pat<(i32 (setne i1:$a, i1:$b)),
- (SELP_u32ii 0, -1, (XORb1rr Int1Regs:$a, Int1Regs:$b))>;
+ (SELP_u32ii 0, -1, (XORb1rr $a, $b))>;
multiclass FSET_FORMAT<PatFrag OpNode, PatLeaf Mode, PatLeaf ModeFTZ> {
// f16 -> pred
def : Pat<(i1 (OpNode f16:$a, f16:$b)),
- (SETP_f16rr Int16Regs:$a, Int16Regs:$b, ModeFTZ)>,
+ (SETP_f16rr $a, $b, ModeFTZ)>,
Requires<[useFP16Math,doF32FTZ]>;
def : Pat<(i1 (OpNode f16:$a, f16:$b)),
- (SETP_f16rr Int16Regs:$a, Int16Regs:$b, Mode)>,
+ (SETP_f16rr $a, $b, Mode)>,
Requires<[useFP16Math]>;
// bf16 -> pred
def : Pat<(i1 (OpNode bf16:$a, bf16:$b)),
- (SETP_bf16rr Int16Regs:$a, Int16Regs:$b, ModeFTZ)>,
+ (SETP_bf16rr $a, $b, ModeFTZ)>,
Requires<[hasBF16Math,doF32FTZ]>;
def : Pat<(i1 (OpNode bf16:$a, bf16:$b)),
- (SETP_bf16rr Int16Regs:$a, Int16Regs:$b, Mode)>,
+ (SETP_bf16rr $a, $b, Mode)>,
Requires<[hasBF16Math]>;
// f32 -> pred
def : Pat<(i1 (OpNode f32:$a, f32:$b)),
- (SETP_f32rr Float32Regs:$a, Float32Regs:$b, ModeFTZ)>,
+ (SETP_f32rr $a, $b, ModeFTZ)>,
Requires<[doF32FTZ]>;
def : Pat<(i1 (OpNode f32:$a, f32:$b)),
- (SETP_f32rr Float32Regs:$a, Float32Regs:$b, Mode)>;
+ (SETP_f32rr $a, $b, Mode)>;
def : Pat<(i1 (OpNode Float32Regs:$a, fpimm:$b)),
- (SETP_f32ri Float32Regs:$a, fpimm:$b, ModeFTZ)>,
+ (SETP_f32ri $a, fpimm:$b, ModeFTZ)>,
Requires<[doF32FTZ]>;
def : Pat<(i1 (OpNode f32:$a, fpimm:$b)),
- (SETP_f32ri Float32Regs:$a, fpimm:$b, Mode)>;
+ (SETP_f32ri $a, fpimm:$b, Mode)>;
def : Pat<(i1 (OpNode fpimm:$a, f32:$b)),
- (SETP_f32ir fpimm:$a, Float32Regs:$b, ModeFTZ)>,
+ (SETP_f32ir fpimm:$a, $b, ModeFTZ)>,
Requires<[doF32FTZ]>;
def : Pat<(i1 (OpNode fpimm:$a, f32:$b)),
- (SETP_f32ir fpimm:$a, Float32Regs:$b, Mode)>;
+ (SETP_f32ir fpimm:$a, $b, Mode)>;
// f64 -> pred
def : Pat<(i1 (OpNode f64:$a, f64:$b)),
- (SETP_f64rr Float64Regs:$a, Float64Regs:$b, Mode)>;
+ (SETP_f64rr $a, $b, Mode)>;
def : Pat<(i1 (OpNode f64:$a, fpimm:$b)),
- (SETP_f64ri Float64Regs:$a, fpimm:$b, Mode)>;
+ (SETP_f64ri $a, fpimm:$b, Mode)>;
def : Pat<(i1 (OpNode fpimm:$a, f64:$b)),
- (SETP_f64ir fpimm:$a, Float64Regs:$b, Mode)>;
+ (SETP_f64ir fpimm:$a, $b, Mode)>;
// f16 -> i32
def : Pat<(i32 (OpNode f16:$a, f16:$b)),
- (SET_f16rr Int16Regs:$a, Int16Regs:$b, ModeFTZ)>,
+ (SET_f16rr $a, $b, ModeFTZ)>,
Requires<[useFP16Math, doF32FTZ]>;
def : Pat<(i32 (OpNode f16:$a, f16:$b)),
- (SET_f16rr Int16Regs:$a, Int16Regs:$b, Mode)>,
+ (SET_f16rr $a, $b, Mode)>,
Requires<[useFP16Math]>;
// bf16 -> i32
def : Pat<(i32 (OpNode bf16:$a, bf16:$b)),
- (SET_bf16rr Int16Regs:$a, Int16Regs:$b, ModeFTZ)>,
+ (SET_bf16rr $a, $b, ModeFTZ)>,
Requires<[hasBF16Math, doF32FTZ]>;
def : Pat<(i32 (OpNode bf16:$a, bf16:$b)),
- (SET_bf16rr Int16Regs:$a, Int16Regs:$b, Mode)>,
+ (SET_bf16rr $a, $b, Mode)>,
Requires<[hasBF16Math]>;
// f32 -> i32
def : Pat<(i32 (OpNode f32:$a, f32:$b)),
- (SET_f32rr Float32Regs:$a, Float32Regs:$b, ModeFTZ)>,
+ (SET_f32rr $a, $b, ModeFTZ)>,
Requires<[doF32FTZ]>;
def : Pat<(i32 (OpNode f32:$a, f32:$b)),
- (SET_f32rr Float32Regs:$a, Float32Regs:$b, Mode)>;
+ (SET_f32rr $a, $b, Mode)>;
def : Pat<(i32 (OpNode f32:$a, fpimm:$b)),
- (SET_f32ri Float32Regs:$a, fpimm:$b, ModeFTZ)>,
+ (SET_f32ri $a, fpimm:$b, ModeFTZ)>,
Requires<[doF32FTZ]>;
def : Pat<(i32 (OpNode f32:$a, fpimm:$b)),
- (SET_f32ri Float32Regs:$a, fpimm:$b, Mode)>;
+ (SET_f32ri $a, fpimm:$b, Mode)>;
def : Pat<(i32 (OpNode fpimm:$a, f32:$b)),
- (SET_f32ir fpimm:$a, Float32Regs:$b, ModeFTZ)>,
+ (SET_f32ir fpimm:$a, $b, ModeFTZ)>,
Requires<[doF32FTZ]>;
def : Pat<(i32 (OpNode fpimm:$a, f32:$b)),
- (SET_f32ir fpimm:$a, Float32Regs:$b, Mode)>;
+ (SET_f32ir fpimm:$a, $b, Mode)>;
// f64 -> i32
def : Pat<(i32 (OpNode f64:$a, f64:$b)),
- (SET_f64rr Float64Regs:$a, Float64Regs:$b, Mode)>;
+ (SET_f64rr $a, $b, Mode)>;
def : Pat<(i32 (OpNode f64:$a, fpimm:$b)),
- (SET_f64ri Float64Regs:$a, fpimm:$b, Mode)>;
+ (SET_f64ri $a, fpimm:$b, Mode)>;
def : Pat<(i32 (OpNode fpimm:$a, f64:$b)),
- (SET_f64ir fpimm:$a, Float64Regs:$b, Mode)>;
+ (SET_f64ir fpimm:$a, $b, Mode)>;
}
defm FSetOGT : FSET_FORMAT<setogt, CmpGT, CmpGT_FTZ>;
@@ -2722,11 +2722,11 @@ def ProxyRegF32 : ProxyRegInst<"f32", f32, Float32Regs>;
def ProxyRegF64 : ProxyRegInst<"f64", f64, Float64Regs>;
foreach vt = [f16, bf16] in {
- def: Pat<(vt (ProxyReg vt:$src)), (ProxyRegI16 Int16Regs:$src)>;
+ def: Pat<(vt (ProxyReg vt:$src)), (ProxyRegI16 $src)>;
}
foreach vt = [v2f16, v2bf16, v2i16, v4i8] in {
- def: Pat<(vt (ProxyReg vt:$src)), (ProxyRegI32 Int32Regs:$src)>;
+ def: Pat<(vt (ProxyReg vt:$src)), (ProxyRegI32 $src)>;
}
//
@@ -3029,9 +3029,9 @@ def BITCONVERT_64_F2I : F_BITCONVERT<"64", f64, i64>;
foreach vt = [v2f16, v2bf16, v2i16, v4i8] in {
def: Pat<(vt (bitconvert (f32 Float32Regs:$a))),
- (BITCONVERT_32_F2I Float32Regs:$a)>;
+ (BITCONVERT_32_F2I $a)>;
def: Pat<(f32 (bitconvert vt:$a)),
- (BITCONVERT_32_I2F Int32Regs:$a)>;
+ (BITCONVERT_32_I2F $a)>;
}
foreach vt = [f16, bf16] in {
def: Pat<(vt (bitconvert i16:$a)),
@@ -3056,280 +3056,280 @@ foreach ta = [v2f16, v2bf16, v2i16, v4i8, i32] in {
// sint -> f16
def : Pat<(f16 (sint_to_fp i1:$a)),
- (CVT_f16_s32 (SELP_s32ii -1, 0, Int1Regs:$a), CvtRN)>;
+ (CVT_f16_s32 (SELP_s32ii -1, 0, $a), CvtRN)>;
def : Pat<(f16 (sint_to_fp Int16Regs:$a)),
- (CVT_f16_s16 i16:$a, CvtRN)>;
+ (CVT_f16_s16 $a, CvtRN)>;
def : Pat<(f16 (sint_to_fp i32:$a)),
- (CVT_f16_s32 i32:$a, CvtRN)>;
+ (CVT_f16_s32 $a, CvtRN)>;
def : Pat<(f16 (sint_to_fp i64:$a)),
- (CVT_f16_s64 i64:$a, CvtRN)>;
+ (CVT_f16_s64 $a, CvtRN)>;
// uint -> f16
def : Pat<(f16 (uint_to_fp i1:$a)),
- (CVT_f16_u32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>;
+ (CVT_f16_u32 (SELP_u32ii 1, 0, $a), CvtRN)>;
def : Pat<(f16 (uint_to_fp Int16Regs:$a)),
- (CVT_f16_u16 i16:$a, CvtRN)>;
+ (CVT_f16_u16 $a, CvtRN)>;
def : Pat<(f16 (uint_to_fp i32:$a)),
- (CVT_f16_u32 i32:$a, CvtRN)>;
+ (CVT_f16_u32 $a, CvtRN)>;
def : Pat<(f16 (uint_to_fp i64:$a)),
- (CVT_f16_u64 i64:$a, CvtRN)>;
+ (CVT_f16_u64 $a, CvtRN)>;
// sint -> bf16
def : Pat<(bf16 (sint_to_fp i1:$a)),
- (CVT_bf16_s32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>, Requires<[hasPTX<78>, hasSM<90>]>;
+ (CVT_bf16_s32 (SELP_u32ii 1, 0, $a), CvtRN)>, Requires<[hasPTX<78>, hasSM<90>]>;
def : Pat<(bf16 (sint_to_fp i16:$a)),
- (CVT_bf16_s16 i16:$a, CvtRN)>, Requires<[hasPTX<78>, hasSM<90>]>;
+ (CVT_bf16_s16 $a, CvtRN)>, Requires<[hasPTX<78>, hasSM<90>]>;
def : Pat<(bf16 (sint_to_fp i32:$a)),
- (CVT_bf16_s32 i32:$a, CvtRN)>, Requires<[hasPTX<78>, hasSM<90>]>;
+ (CVT_bf16_s32 $a, CvtRN)>, Requires<[hasPTX<78>, hasSM<90>]>;
def : Pat<(bf16 (sint_to_fp i64:$a)),
- (CVT_bf16_s64 i64:$a, CvtRN)>, Requires<[hasPTX<78>, hasSM<90>]>;
+ (CVT_bf16_s64 $a, CvtRN)>, Requires<[hasPTX<78>, hasSM<90>]>;
// uint -> bf16
def : Pat<(bf16 (uint_to_fp i1:$a)),
- (CVT_bf16_u32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>, Requires<[hasPTX<78>, hasSM<90>]>;
+ (CVT_bf16_u32 (SELP_u32ii 1, 0, $a), CvtRN)>, Requires<[hasPTX<78>, hasSM<90>]>;
def : Pat<(bf16 (uint_to_fp i16:$a)),
- (CVT_bf16_u16 i16:$a, CvtRN)>, Requires<[hasPTX<78>, hasSM<90>]>;
+ (CVT_bf16_u16 $a, CvtRN)>, Requires<[hasPTX<78>, hasSM<90>]>;
def : Pat<(bf16 (uint_to_fp i32:$a)),
- (CVT_bf16_u32 i32:$a, CvtRN)>, Requires<[hasPTX<78>, hasSM<90>]>;
+ (CVT_bf16_u32 $a, CvtRN)>, Requires<[hasPTX<78>, hasSM<90>]>;
def : Pat<(bf16 (uint_to_fp i64:$a)),
- (CVT_bf16_u64 i64:$a, CvtRN)>, Requires<[hasPTX<78>, hasSM<90>]>;
+ (CVT_bf16_u64 $a, CvtRN)>, Requires<[hasPTX<78>, hasSM<90>]>;
// sint -> f32
def : Pat<(f32 (sint_to_fp i1:$a)),
- (CVT_f32_s32 (SELP_s32ii -1, 0, Int1Regs:$a), CvtRN)>;
+ (CVT_f32_s32 (SELP_s32ii -1, 0, $a), CvtRN)>;
def : Pat<(f32 (sint_to_fp i16:$a)),
- (CVT_f32_s16 i16:$a, CvtRN)>;
+ (CVT_f32_s16 $a, CvtRN)>;
def : Pat<(f32 (sint_to_fp i32:$a)),
- (CVT_f32_s32 i32:$a, CvtRN)>;
+ (CVT_f32_s32 $a, CvtRN)>;
def : Pat<(f32 (sint_to_fp i64:$a)),
- (CVT_f32_s64 i64:$a, CvtRN)>;
+ (CVT_f32_s64 $a, CvtRN)>;
// uint -> f32
def : Pat<(f32 (uint_to_fp i1:$a)),
- (CVT_f32_u32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>;
+ (CVT_f32_u32 (SELP_u32ii 1, 0, $a), CvtRN)>;
def : Pat<(f32 (uint_to_fp i16:$a)),
- (CVT_f32_u16 Int16Regs:$a, CvtRN)>;
+ (CVT_f32_u16 $a, CvtRN)>;
def : Pat<(f32 (uint_to_fp i32:$a)),
- (CVT_f32_u32 i32:$a, CvtRN)>;
+ (CVT_f32_u32 $a, CvtRN)>;
def : Pat<(f32 (uint_to_fp i64:$a)),
- (CVT_f32_u64 i64:$a, CvtRN)>;
+ (CVT_f32_u64 $a, CvtRN)>;
// sint -> f64
def : Pat<(f64 (sint_to_fp i1:$a)),
- (CVT_f64_s32 (SELP_s32ii -1, 0, Int1Regs:$a), CvtRN)>;
+ (CVT_f64_s32 (SELP_s32ii -1, 0, $a), CvtRN)>;
def : Pat<(f64 (sint_to_fp i16:$a)),
- (CVT_f64_s16 Int16Regs:$a, CvtRN)>;
+ (CVT_f64_s16 $a, CvtRN)>;
def : Pat<(f64 (sint_to_fp i32:$a)),
- (CVT_f64_s32 i32:$a, CvtRN)>;
+ (CVT_f64_s32 $a, CvtRN)>;
def : Pat<(f64 (sint_to_fp i64:$a)),
- (CVT_f64_s64 i64:$a, CvtRN)>;
+ (CVT_f64_s64 $a, CvtRN)>;
// uint -> f64
def : Pat<(f64 (uint_to_fp i1:$a)),
- (CVT_f64_u32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>;
+ (CVT_f64_u32 (SELP_u32ii 1, 0, $a), CvtRN)>;
def : Pat<(f64 (uint_to_fp i16:$a)),
- (CVT_f64_u16 Int16Regs:$a, CvtRN)>;
+ (CVT_f64_u16 $a, CvtRN)>;
def : Pat<(f64 (uint_to_fp i32:$a)),
- (CVT_f64_u32 i32:$a, CvtRN)>;
+ (CVT_f64_u32 $a, CvtRN)>;
def : Pat<(f64 (uint_to_fp i64:$a)),
- (CVT_f64_u64 i64:$a, CvtRN)>;
+ (CVT_f64_u64 $a, CvtRN)>;
// f16 -> sint
def : Pat<(i1 (fp_to_sint f16:$a)),
- (SETP_b16ri Int16Regs:$a, 0, CmpEQ)>;
+ (SETP_b16ri $a, 0, CmpEQ)>;
def : Pat<(i16 (fp_to_sint f16:$a)),
- (CVT_s16_f16 Int16Regs:$a, CvtRZI)>;
+ (CVT_s16_f16 $a, CvtRZI)>;
def : Pat<(i32 (fp_to_sint f16:$a)),
- (CVT_s32_f16 Int16Regs:$a, CvtRZI)>;
+ (CVT_s32_f16 $a, CvtRZI)>;
def : Pat<(i64 (fp_to_sint f16:$a)),
- (CVT_s64_f16 Int16Regs:$a, CvtRZI)>;
+ (CVT_s64_f16 $a, CvtRZI)>;
// f16 -> uint
def : Pat<(i1 (fp_to_uint f16:$a)),
- (SETP_b16ri Int16Regs:$a, 0, CmpEQ)>;
+ (SETP_b16ri $a, 0, CmpEQ)>;
def : Pat<(i16 (fp_to_uint f16:$a)),
- (CVT_u16_f16 Int16Regs:$a, CvtRZI)>;
+ (CVT_u16_f16 $a, CvtRZI)>;
def : Pat<(i32 (fp_to_uint f16:$a)),
- (CVT_u32_f16 Int16Regs:$a, CvtRZI)>;
+ (CVT_u32_f16 $a, CvtRZI)>;
def : Pat<(i64 (fp_to_uint f16:$a)),
- (CVT_u64_f16 Int16Regs:$a, CvtRZI)>;
+ (CVT_u64_f16 $a, CvtRZI)>;
// bf16 -> sint
def : Pat<(i1 (fp_to_sint bf16:$a)),
- (SETP_b16ri Int16Regs:$a, 0, CmpEQ)>;
+ (SETP_b16ri $a, 0, CmpEQ)>;
def : Pat<(i16 (fp_to_sint bf16:$a)),
- (CVT_s16_bf16 Int16Regs:$a, CvtRZI)>;
+ (CVT_s16_bf16 $a, CvtRZI)>;
def : Pat<(i32 (fp_to_sint bf16:$a)),
- (CVT_s32_bf16 Int16Regs:$a, CvtRZI)>;
+ (CVT_s32_bf16 $a, CvtRZI)>;
def : Pat<(i64 (fp_to_sint bf16:$a)),
- (CVT_s64_bf16 Int16Regs:$a, CvtRZI)>;
+ (CVT_s64_bf16 $a, CvtRZI)>;
// bf16 -> uint
def : Pat<(i1 (fp_to_uint bf16:$a)),
- (SETP_b16ri Int16Regs:$a, 0, CmpEQ)>;
+ (SETP_b16ri $a, 0, CmpEQ)>;
def : Pat<(i16 (fp_to_uint bf16:$a)),
- (CVT_u16_bf16 Int16Regs:$a, CvtRZI)>;
+ (CVT_u16_bf16 $a, CvtRZI)>;
def : Pat<(i32 (fp_to_uint bf16:$a)),
- (CVT_u32_bf16 Int16Regs:$a, CvtRZI)>;
+ (CVT_u32_bf16 $a, CvtRZI)>;
def : Pat<(i64 (fp_to_uint bf16:$a)),
- (CVT_u64_bf16 Int16Regs:$a, CvtRZI)>;
+ (CVT_u64_bf16 $a, CvtRZI)>;
// f32 -> sint
def : Pat<(i1 (fp_to_sint f32:$a)),
- (SETP_b32ri (BITCONVERT_32_F2I Float32Regs:$a), 0, CmpEQ)>;
+ (SETP_b32ri (BITCONVERT_32_F2I $a), 0, CmpEQ)>;
def : Pat<(i16 (fp_to_sint f32:$a)),
- (CVT_s16_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
+ (CVT_s16_f32 $a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
def : Pat<(i16 (fp_to_sint f32:$a)),
- (CVT_s16_f32 Float32Regs:$a, CvtRZI)>;
+ (CVT_s16_f32 $a, CvtRZI)>;
def : Pat<(i32 (fp_to_sint f32:$a)),
- (CVT_s32_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
+ (CVT_s32_f32 $a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
def : Pat<(i32 (fp_to_sint f32:$a)),
- (CVT_s32_f32 Float32Regs:$a, CvtRZI)>;
+ (CVT_s32_f32 $a, CvtRZI)>;
def : Pat<(i64 (fp_to_sint f32:$a)),
- (CVT_s64_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
+ (CVT_s64_f32 $a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
def : Pat<(i64 (fp_to_sint f32:$a)),
- (CVT_s64_f32 Float32Regs:$a, CvtRZI)>;
+ (CVT_s64_f32 $a, CvtRZI)>;
// f32 -> uint
def : Pat<(i1 (fp_to_uint f32:$a)),
- (SETP_b32ri (BITCONVERT_32_F2I Float32Regs:$a), 0, CmpEQ)>;
+ (SETP_b32ri (BITCONVERT_32_F2I $a), 0, CmpEQ)>;
def : Pat<(i16 (fp_to_uint f32:$a)),
- (CVT_u16_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
+ (CVT_u16_f32 $a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
def : Pat<(i16 (fp_to_uint f32:$a)),
- (CVT_u16_f32 Float32Regs:$a, CvtRZI)>;
+ (CVT_u16_f32 $a, CvtRZI)>;
def : Pat<(i32 (fp_to_uint f32:$a)),
- (CVT_u32_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
+ (CVT_u32_f32 $a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
def : Pat<(i32 (fp_to_uint f32:$a)),
- (CVT_u32_f32 Float32Regs:$a, CvtRZI)>;
+ (CVT_u32_f32 $a, CvtRZI)>;
def : Pat<(i64 (fp_to_uint f32:$a)),
- (CVT_u64_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
+ (CVT_u64_f32 $a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
def : Pat<(i64 (fp_to_uint f32:$a)),
- (CVT_u64_f32 Float32Regs:$a, CvtRZI)>;
+ (CVT_u64_f32 $a, CvtRZI)>;
// f64 -> sint
def : Pat<(i1 (fp_to_sint f64:$a)),
- (SETP_b64ri (BITCONVERT_64_F2I Float64Regs:$a), 0, CmpEQ)>;
+ (SETP_b64ri (BITCONVERT_64_F2I $a), 0, CmpEQ)>;
def : Pat<(i16 (fp_to_sint f64:$a)),
- (CVT_s16_f64 Float64Regs:$a, CvtRZI)>;
+ (CVT_s16_f64 $a, CvtRZI)>;
def : Pat<(i32 (fp_to_sint f64:$a)),
- (CVT_s32_f64 Float64Regs:$a, CvtRZI)>;
+ (CVT_s32_f64 $a, CvtRZI)>;
def : Pat<(i64 (fp_to_sint f64:$a)),
- (CVT_s64_f64 Float64Regs:$a, CvtRZI)>;
+ (CVT_s64_f64 $a, CvtRZI)>;
// f64 -> uint
def : Pat<(i1 (fp_to_uint f64:$a)),
- (SETP_b64ri (BITCONVERT_64_F2I Float64Regs:$a), 0, CmpEQ)>;
+ (SETP_b64ri (BITCONVERT_64_F2I $a), 0, CmpEQ)>;
def : Pat<(i16 (fp_to_uint f64:$a)),
- (CVT_u16_f64 Float64Regs:$a, CvtRZI)>;
+ (CVT_u16_f64 $a, CvtRZI)>;
def : Pat<(i32 (fp_to_uint f64:$a)),
- (CVT_u32_f64 Float64Regs:$a, CvtRZI)>;
+ (CVT_u32_f64 $a, CvtRZI)>;
def : Pat<(i64 (fp_to_uint f64:$a)),
- (CVT_u64_f64 Float64Regs:$a, CvtRZI)>;
+ (CVT_u64_f64 $a, CvtRZI)>;
// sext i1
def : Pat<(i16 (sext i1:$a)),
- (SELP_s16ii -1, 0, Int1Regs:$a)>;
+ (SELP_s16ii -1, 0, $a)>;
def : Pat<(i32 (sext i1:$a)),
- (SELP_s32ii -1, 0, Int1Regs:$a)>;
+ (SELP_s32ii -1, 0, $a)>;
def : Pat<(i64 (sext i1:$a)),
- (SELP_s64ii -1, 0, Int1Regs:$a)>;
+ (SELP_s64ii -1, 0, $a)>;
// zext i1
def : Pat<(i16 (zext i1:$a)),
- (SELP_u16ii 1, 0, Int1Regs:$a)>;
+ (SELP_u16ii 1, 0, $a)>;
def : Pat<(i32 (zext i1:$a)),
- (SELP_u32ii 1, 0, Int1Regs:$a)>;
+ (SELP_u32ii 1, 0, $a)>;
def : Pat<(i64 (zext i1:$a)),
- (SELP_u64ii 1, 0, Int1Regs:$a)>;
+ (SELP_u64ii 1, 0, $a)>;
// anyext i1
def : Pat<(i16 (anyext i1:$a)),
- (SELP_u16ii -1, 0, Int1Regs:$a)>;
+ (SELP_u16ii -1, 0, $a)>;
def : Pat<(i32 (anyext i1:$a)),
- (SELP_u32ii -1, 0, Int1Regs:$a)>;
+ (SELP_u32ii -1, 0, $a)>;
def : Pat<(i64 (anyext i1:$a)),
- (SELP_u64ii -1, 0, Int1Regs:$a)>;
+ (SELP_u64ii -1, 0, $a)>;
// sext i16
def : Pat<(i32 (sext i16:$a)),
- (CVT_s32_s16 Int16Regs:$a, CvtNONE)>;
+ (CVT_s32_s16 $a, CvtNONE)>;
def : Pat<(i64 (sext i16:$a)),
- (CVT_s64_s16 Int16Regs:$a, CvtNONE)>;
+ (CVT_s64_s16 $a, CvtNONE)>;
// zext i16
def : Pat<(i32 (zext i16:$a)),
- (CVT_u32_u16 Int16Regs:$a, CvtNONE)>;
+ (CVT_u32_u16 $a, CvtNONE)>;
def : Pat<(i64 (zext i16:$a)),
- (CVT_u64_u16 Int16Regs:$a, CvtNONE)>;
+ (CVT_u64_u16 $a, CvtNONE)>;
// anyext i16
def : Pat<(i32 (anyext i16:$a)),
- (CVT_u32_u16 Int16Regs:$a, CvtNONE)>;
+ (CVT_u32_u16 $a, CvtNONE)>;
def : Pat<(i64 (anyext i16:$a)),
- (CVT_u64_u16 Int16Regs:$a, CvtNONE)>;
+ (CVT_u64_u16 $a, CvtNONE)>;
// sext i32
def : Pat<(i64 (sext i32:$a)),
- (CVT_s64_s32 Int32Regs:$a, CvtNONE)>;
+ (CVT_s64_s32 $a, CvtNONE)>;
// zext i32
def : Pat<(i64 (zext i32:$a)),
- (CVT_u64_u32 Int32Regs:$a, CvtNONE)>;
+ (CVT_u64_u32 $a, CvtNONE)>;
// anyext i32
def : Pat<(i64 (anyext i32:$a)),
- (CVT_u64_u32 Int32Regs:$a, CvtNONE)>;
+ (CVT_u64_u32 $a, CvtNONE)>;
// truncate i64
def : Pat<(i32 (trunc i64:$a)),
- (CVT_u32_u64 Int64Regs:$a, CvtNONE)>;
+ (CVT_u32_u64 $a, CvtNONE)>;
def : Pat<(i16 (trunc i64:$a)),
- (CVT_u16_u64 Int64Regs:$a, CvtNONE)>;
+ (CVT_u16_u64 $a, CvtNONE)>;
def : Pat<(i1 (trunc i64:$a)),
- (SETP_b64ri (ANDb64ri Int64Regs:$a, 1), 1, CmpEQ)>;
+ (SETP_b64ri (ANDb64ri $a, 1), 1, CmpEQ)>;
// truncate i32
def : Pat<(i16 (trunc i32:$a)),
- (CVT_u16_u32 Int32Regs:$a, CvtNONE)>;
+ (CVT_u16_u32 $a, CvtNONE)>;
def : Pat<(i1 (trunc i32:$a)),
- (SETP_b32ri (ANDb32ri Int32Regs:$a, 1), 1, CmpEQ)>;
+ (SETP_b32ri (ANDb32ri $a, 1), 1, CmpEQ)>;
// truncate i16
def : Pat<(i1 (trunc i16:$a)),
- (SETP_b16ri (ANDb16ri Int16Regs:$a, 1), 1, CmpEQ)>;
+ (SETP_b16ri (ANDb16ri $a, 1), 1, CmpEQ)>;
// sext_inreg
-def : Pat<(sext_inreg i16:$a, i8), (CVT_INREG_s16_s8 Int16Regs:$a)>;
-def : Pat<(sext_inreg i32:$a, i8), (CVT_INREG_s32_s8 Int32Regs:$a)>;
-def : Pat<(sext_inreg i32:$a, i16), (CVT_INREG_s32_s16 Int32Regs:$a)>;
-def : Pat<(sext_inreg i64:$a, i8), (CVT_INREG_s64_s8 Int64Regs:$a)>;
-def : Pat<(sext_inreg i64:$a, i16), (CVT_INREG_s64_s16 Int64Regs:$a)>;
-def : Pat<(sext_inreg i64:$a, i32), (CVT_INREG_s64_s32 Int64Regs:$a)>;
+def : Pat<(sext_inreg i16:$a, i8), (CVT_INREG_s16_s8 $a)>;
+def : Pat<(sext_inreg i32:$a, i8), (CVT_INREG_s32_s8 $a)>;
+def : Pat<(sext_inreg i32:$a, i16), (CVT_INREG_s32_s16 $a)>;
+def : Pat<(sext_inreg i64:$a, i8), (CVT_INREG_s64_s8 $a)>;
+def : Pat<(sext_inreg i64:$a, i16), (CVT_INREG_s64_s16 $a)>;
+def : Pat<(sext_inreg i64:$a, i32), (CVT_INREG_s64_s32 $a)>;
// Select instructions with 32-bit predicates
def : Pat<(select i32:$pred, i16:$a, i16:$b),
- (SELP_b16rr Int16Regs:$a, Int16Regs:$b,
- (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>;
+ (SELP_b16rr $a, $b,
+ (SETP_b32ri (ANDb32ri $pred, 1), 1, CmpEQ))>;
def : Pat<(select i32:$pred, i32:$a, i32:$b),
- (SELP_b32rr Int32Regs:$a, Int32Regs:$b,
- (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>;
+ (SELP_b32rr $a, $b,
+ (SETP_b32ri (ANDb32ri $pred, 1), 1, CmpEQ))>;
def : Pat<(select i32:$pred, i64:$a, i64:$b),
- (SELP_b64rr Int64Regs:$a, Int64Regs:$b,
- (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>;
+ (SELP_b64rr $a, $b,
+ (SETP_b32ri (ANDb32ri $pred, 1), 1, CmpEQ))>;
def : Pat<(select i32:$pred, f16:$a, f16:$b),
- (SELP_f16rr Int16Regs:$a, Int16Regs:$b,
- (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>;
+ (SELP_f16rr $a, $b,
+ (SETP_b32ri (ANDb32ri $pred, 1), 1, CmpEQ))>;
def : Pat<(select i32:$pred, bf16:$a, bf16:$b),
- (SELP_bf16rr Int16Regs:$a, Int16Regs:$b,
- (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>;
+ (SELP_bf16rr $a, $b,
+ (SETP_b32ri (ANDb32ri $pred, 1), 1, CmpEQ))>;
def : Pat<(select i32:$pred, f32:$a, f32:$b),
- (SELP_f32rr Float32Regs:$a, Float32Regs:$b,
- (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>;
+ (SELP_f32rr $a, $b,
+ (SETP_b32ri (ANDb32ri $pred, 1), 1, CmpEQ))>;
def : Pat<(select i32:$pred, f64:$a, f64:$b),
- (SELP_f64rr Float64Regs:$a, Float64Regs:$b,
- (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>;
+ (SELP_f64rr $a, $b,
+ (SETP_b32ri (ANDb32ri $pred, 1), 1, CmpEQ))>;
let hasSideEffects = false in {
@@ -3391,32 +3391,32 @@ let hasSideEffects = false in {
// Using partial vectorized move produces better SASS code for extraction of
// upper/lower parts of an integer.
def : Pat<(i16 (trunc (srl i32:$s, (i32 16)))),
- (I32toI16H Int32Regs:$s)>;
+ (I32toI16H $s)>;
def : Pat<(i16 (trunc (sra i32:$s, (i32 16)))),
- (I32toI16H Int32Regs:$s)>;
+ (I32toI16H $s)>;
def : Pat<(i32 (trunc (srl i64:$s, (i32 32)))),
- (I64toI32H Int64Regs:$s)>;
+ (I64toI32H $s)>;
def : Pat<(i32 (trunc (sra i64:$s, (i32 32)))),
- (I64toI32H Int64Regs:$s)>;
+ (I64toI32H $s)>;
def: Pat<(i32 (sext (extractelt v2i16:$src, 0))),
- (CVT_INREG_s32_s16 Int32Regs:$src)>;
+ (CVT_INREG_s32_s16 $src)>;
foreach vt = [v2f16, v2bf16, v2i16] in {
def : Pat<(extractelt vt:$src, 0),
- (I32toI16L Int32Regs:$src)>;
+ (I32toI16L $src)>;
def : Pat<(extractelt vt:$src, 1),
- (I32toI16H Int32Regs:$src)>;
+ (I32toI16H $src)>;
}
def : Pat<(v2f16 (build_vector f16:$a, f16:$b)),
- (V2I16toI32 Int16Regs:$a, Int16Regs:$b)>;
+ (V2I16toI32 $a, $b)>;
def : Pat<(v2bf16 (build_vector bf16:$a, bf16:$b)),
- (V2I16toI32 Int16Regs:$a, Int16Regs:$b)>;
+ (V2I16toI32 $a, $b)>;
def : Pat<(v2i16 (build_vector i16:$a, i16:$b)),
- (V2I16toI32 Int16Regs:$a, Int16Regs:$b)>;
+ (V2I16toI32 $a, $b)>;
def: Pat<(v2i16 (scalar_to_vector i16:$a)),
- (CVT_u32_u16 Int16Regs:$a, CvtNONE)>;
+ (CVT_u32_u16 $a, CvtNONE)>;
//
// Funnel-Shift
@@ -3455,13 +3455,13 @@ let hasSideEffects = false in {
}
def : Pat<(i32 (int_nvvm_fshl_clamp i32:$hi, i32:$lo, i32:$amt)),
- (SHF_L_CLAMP_r Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt)>;
+ (SHF_L_CLAMP_r $lo, $hi, $amt)>;
def : Pat<(i32 (int_nvvm_fshl_clamp i32:$hi, i32:$lo, (i32 imm:$amt))),
- (SHF_L_CLAMP_i Int32Regs:$lo, Int32Regs:$hi, imm:$amt)>;
+ (SHF_L_CLAMP_i $lo, $hi, imm:$amt)>;
def : Pat<(i32 (int_nvvm_fshr_clamp i32:$hi, i32:$lo, i32:$amt)),
- (SHF_R_CLAMP_r Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt)>;
+ (SHF_R_CLAMP_r $lo, $hi, $amt)>;
def : Pat<(i32 (int_nvvm_fshr_clamp i32:$hi, i32:$lo, (i32 imm:$amt))),
- (SHF_R_CLAMP_i Int32Regs:$lo, Int32Regs:$hi, imm:$amt)>;
+ (SHF_R_CLAMP_i $lo, $hi, imm:$amt)>;
// Count leading zeros
let hasSideEffects = false in {
@@ -3472,14 +3472,14 @@ let hasSideEffects = false in {
}
// 32-bit has a direct PTX instruction
-def : Pat<(i32 (ctlz i32:$a)), (CLZr32 i32:$a)>;
+def : Pat<(i32 (ctlz i32:$a)), (CLZr32 $a)>;
// The return type of the ctlz ISD node is the same as its input, but the PTX
// ctz instruction always returns a 32-bit value. For ctlz.i64, convert the
// ptx value to 64 bits to match the ISD node's semantics, unless we know we're
// truncating back down to 32 bits.
-def : Pat<(i64 (ctlz i64:$a)), (CVT_u64_u32 (CLZr64 Int64Regs:$a), CvtNONE)>;
-def : Pat<(i32 (trunc (i64 (ctlz i64:$a)))), (CLZr64 Int64Regs:$a)>;
+def : Pat<(i64 (ctlz i64:$a)), (CVT_u64_u32 (CLZr64 $a), CvtNONE)>;
+def : Pat<(i32 (trunc (i64 (ctlz i64:$a)))), (CLZr64 $a)>;
// For 16-bit ctlz, we zero-extend to 32-bit, perform the count, then trunc the
// result back to 16-bits if necessary. We also need to subtract 16 because
@@ -3497,9 +3497,9 @@ def : Pat<(i32 (trunc (i64 (ctlz i64:$a)))), (CLZr64 Int64Regs:$a)>;
// "mov b32reg, {b16imm, b16reg}", so we don't do this optimization.
def : Pat<(i16 (ctlz i16:$a)),
(SUBi16ri (CVT_u16_u32
- (CLZr32 (CVT_u32_u16 Int16Regs:$a, CvtNONE)), CvtNONE), 16)>;
+ (CLZr32 (CVT_u32_u16 $a, CvtNONE)), CvtNONE), 16)>;
def : Pat<(i32 (zext (i16 (ctlz i16:$a)))),
- (SUBi32ri (CLZr32 (CVT_u32_u16 Int16Regs:$a, CvtNONE)), 16)>;
+ (SUBi32ri (CLZr32 (CVT_u32_u16 $a, CvtNONE)), 16)>;
// Population count
let hasSideEffects = false in {
@@ -3510,67 +3510,67 @@ let hasSideEffects = false in {
}
// 32-bit has a direct PTX instruction
-def : Pat<(i32 (ctpop i32:$a)), (POPCr32 Int32Regs:$a)>;
+def : Pat<(i32 (ctpop i32:$a)), (POPCr32 $a)>;
// For 64-bit, the result in PTX is actually 32-bit so we zero-extend to 64-bit
// to match the LLVM semantics. Just as with ctlz.i64, we provide a second
// pattern that avoids the type conversion if we're truncating the result to
// i32 anyway.
-def : Pat<(ctpop i64:$a), (CVT_u64_u32 (POPCr64 Int64Regs:$a), CvtNONE)>;
-def : Pat<(i32 (trunc (i64 (ctpop i64:$a)))), (POPCr64 Int64Regs:$a)>;
+def : Pat<(ctpop i64:$a), (CVT_u64_u32 (POPCr64 $a), CvtNONE)>;
+def : Pat<(i32 (trunc (i64 (ctpop i64:$a)))), (POPCr64 $a)>;
// For 16-bit, we zero-extend to 32-bit, then trunc the result back to 16-bits.
// If we know that we're storing into an i32, we can avoid the final trunc.
def : Pat<(ctpop i16:$a),
- (CVT_u16_u32 (POPCr32 (CVT_u32_u16 Int16Regs:$a, CvtNONE)), CvtNONE)>;
+ (CVT_u16_u32 (POPCr32 (CVT_u32_u16 $a, CvtNONE)), CvtNONE)>;
def : Pat<(i32 (zext (i16 (ctpop i16:$a)))),
- (POPCr32 (CVT_u32_u16 Int16Regs:$a, CvtNONE))>;
+ (POPCr32 (CVT_u32_u16 $a, CvtNONE))>;
// fpround f32 -> f16
def : Pat<(f16 (fpround f32:$a)),
- (CVT_f16_f32 Float32Regs:$a, CvtRN)>;
+ (CVT_f16_f32 $a, CvtRN)>;
// fpround f32 -> bf16
def : Pat<(bf16 (fpround f32:$a)),
- (CVT_bf16_f32 Float32Regs:$a, CvtRN)>, Requires<[hasPTX<70>, hasSM<80>]>;
+ (CVT_bf16_f32 $a, CvtRN)>, Requires<[hasPTX<70>, hasSM<80>]>;
// fpround f64 -> f16
def : Pat<(f16 (fpround f64:$a)),
- (CVT_f16_f64 Float64Regs:$a, CvtRN)>;
+ (CVT_f16_f64 $a, CvtRN)>;
// fpround f64 -> bf16
def : Pat<(bf16 (fpround f64:$a)),
- (CVT_bf16_f64 Float64Regs:$a, CvtRN)>, Requires<[hasPTX<78>, hasSM<90>]>;
+ (CVT_bf16_f64 $a, CvtRN)>, Requires<[hasPTX<78>, hasSM<90>]>;
// fpround f64 -> f32
def : Pat<(f32 (fpround f64:$a)),
- (CVT_f32_f64 Float64Regs:$a, CvtRN_FTZ)>, Requires<[doF32FTZ]>;
+ (CVT_f32_f64 $a, CvtRN_FTZ)>, Requires<[doF32FTZ]>;
def : Pat<(f32 (fpround f64:$a)),
- (CVT_f32_f64 Float64Regs:$a, CvtRN)>;
+ (CVT_f32_f64 $a, CvtRN)>;
// fpextend f16 -> f32
def : Pat<(f32 (fpextend f16:$a)),
- (CVT_f32_f16 Int16Regs:$a, CvtNONE_FTZ)>, Requires<[doF32FTZ]>;
+ (CVT_f32_f16 $a, CvtNONE_FTZ)>, Requires<[doF32FTZ]>;
def : Pat<(f32 (fpextend f16:$a)),
- (CVT_f32_f16 Int16Regs:$a, CvtNONE)>;
+ (CVT_f32_f16 $a, CvtNONE)>;
// fpextend bf16 -> f32
def : Pat<(f32 (fpextend bf16:$a)),
- (CVT_f32_bf16 Int16Regs:$a, CvtNONE_FTZ)>, Requires<[doF32FTZ]>;
+ (CVT_f32_bf16 $a, CvtNONE_FTZ)>, Requires<[doF32FTZ]>;
def : Pat<(f32 (fpextend bf16:$a)),
- (CVT_f32_bf16 Int16Regs:$a, CvtNONE)>, Requires<[hasPTX<71>, hasSM<80>]>;
+ (CVT_f32_bf16 $a, CvtNONE)>, Requires<[hasPTX<71>, hasSM<80>]>;
// fpextend f16 -> f64
def : Pat<(f64 (fpextend f16:$a)),
- (CVT_f64_f16 Int16Regs:$a, CvtNONE)>;
+ (CVT_f64_f16 $a, CvtNONE)>;
// fpextend bf16 -> f64
def : Pat<(f64 (fpextend bf16:$a)),
- (CVT_f64_bf16 Int16Regs:$a, CvtNONE)>, Requires<[hasPTX<78>, hasSM<90>]>;
+ (CVT_f64_bf16 $a, CvtNONE)>, Requires<[hasPTX<78>, hasSM<90>]>;
// fpextend f32 -> f64
def : Pat<(f64 (fpextend f32:$a)),
- (CVT_f64_f32 Float32Regs:$a, CvtNONE_FTZ)>, Requires<[doF32FTZ]>;
+ (CVT_f64_f32 $a, CvtNONE_FTZ)>, Requires<[doF32FTZ]>;
def : Pat<(f64 (fpextend f32:$a)),
- (CVT_f64_f32 Float32Regs:$a, CvtNONE)>;
+ (CVT_f64_f32 $a, CvtNONE)>;
def retglue : SDNode<"NVPTXISD::RET_GLUE", SDTNone,
[SDNPHasChain, SDNPOptInGlue]>;
@@ -3579,15 +3579,15 @@ def retglue : SDNode<"NVPTXISD::RET_GLUE", SDTNone,
multiclass CVT_ROUND<SDNode OpNode, PatLeaf Mode, PatLeaf ModeFTZ> {
def : Pat<(OpNode f16:$a),
- (CVT_f16_f16 Int16Regs:$a, Mode)>;
+ (CVT_f16_f16 $a, Mode)>;
def : Pat<(OpNode bf16:$a),
- (CVT_bf16_bf16 Int16Regs:$a, Mode)>;
+ (CVT_bf16_bf16 $a, Mode)>;
def : Pat<(OpNode f32:$a),
- (CVT_f32_f32 Float32Regs:$a, ModeFTZ)>, Requires<[doF32FTZ]>;
+ (CVT_f32_f32 $a, ModeFTZ)>, Requires<[doF32FTZ]>;
def : Pat<(OpNode f32:$a),
- (CVT_f32_f32 Float32Regs:$a, Mode)>, Requires<[doNoF32FTZ]>;
+ (CVT_f32_f32 $a, Mode)>, Requires<[doNoF32FTZ]>;
def : Pat<(OpNode f64:$a),
- (CVT_f64_f64 Float64Regs:$a, Mode)>;
+ (CVT_f64_f64 $a, Mode)>;
}
defm : CVT_ROUND<fceil, CvtRPI, CvtRPI_FTZ>;
@@ -3624,7 +3624,7 @@ let isTerminator=1 in {
}
def : Pat<(brcond i32:$a, bb:$target),
- (CBranch (SETP_u32ri Int32Regs:$a, 0, CmpNE), bb:$target)>;
+ (CBranch (SETP_u32ri $a, 0, CmpNE), bb:$target)>;
// SelectionDAGBuilder::visitSWitchCase() will invert the condition of a
// conditional branch if the target block is the next block so that the code
@@ -3632,7 +3632,7 @@ def : Pat<(brcond i32:$a, bb:$target),
// condition, 1', which will be translated to (setne condition, -1). Since ptx
// supports '@!pred bra target', we should use it.
def : Pat<(brcond (i1 (setne i1:$a, -1)), bb:$target),
- (CBranchOther i1:$a, bb:$target)>;
+ (CBranchOther $a, bb:$target)>;
// Call
def SDT_NVPTXCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>,
@@ -3830,17 +3830,17 @@ include "NVPTXIntrinsics.td"
def : Pat <
(i32 (bswap i32:$a)),
- (INT_NVVM_PRMT Int32Regs:$a, (i32 0), (i32 0x0123))>;
+ (INT_NVVM_PRMT $a, (i32 0), (i32 0x0123))>;
def : Pat <
(v2i16 (bswap v2i16:$a)),
- (INT_NVVM_PRMT Int32Regs:$a, (i32 0), (i32 0x2301))>;
+ (INT_NVVM_PRMT $a, (i32 0), (i32 0x2301))>;
def : Pat <
(i64 (bswap i64:$a)),
(V2I32toI64
- (INT_NVVM_PRMT (I64toI32H Int64Regs:$a), (i32 0), (i32 0x0123)),
- (INT_NVVM_PRMT (I64toI32L Int64Regs:$a), (i32 0), (i32 0x0123)))>;
+ (INT_NVVM_PRMT (I64toI32H $a), (i32 0), (i32 0x0123)),
+ (INT_NVVM_PRMT (I64toI32L $a), (i32 0), (i32 0x0123)))>;
////////////////////////////////////////////////////////////////////////////////
@@ -3910,18 +3910,18 @@ def FMARELU_BF16X2 : NVPTXInst_rrr<Int32Regs, "fma.rn.relu.bf16x2", [hasBF16Math
// FTZ
def : Pat<(f16 (NVPTX_fmaxnum_nsz (NVPTX_fma_oneuse_and_nnan f16:$a, f16:$b, f16:$c), fpimm_any_zero)),
- (FMARELU_F16_FTZ Int16Regs:$a, Int16Regs:$b, Int16Regs:$c)>,
+ (FMARELU_F16_FTZ $a, $b, $c)>,
Requires<[doF32FTZ]>;
def : Pat<(v2f16 (NVPTX_fmaxnum_nsz (NVPTX_fma_oneuse_and_nnan v2f16:$a, v2f16:$b, v2f16:$c), fpimm_positive_zero_v2f16)),
- (FMARELU_F16X2_FTZ Int32Regs:$a, Int32Regs:$b, Int32Regs:$c)>,
+ (FMARELU_F16X2_FTZ $a, $b, $c)>,
Requires<[doF32FTZ]>;
// NO FTZ
def : Pat<(f16 (NVPTX_fmaxnum_nsz (NVPTX_fma_oneuse_and_nnan f16:$a, f16:$b, f16:$c), fpimm_any_zero)),
- (FMARELU_F16 Int16Regs:$a, Int16Regs:$b, Int16Regs:$c)>;
+ (FMARELU_F16 $a, $b, $c)>;
def : Pat<(bf16 (NVPTX_fmaxnum_nsz (NVPTX_fma_oneuse_and_nnan bf16:$a, bf16:$b, bf16:$c), fpimm_any_zero)),
- (FMARELU_BF16 Int16Regs:$a, Int16Regs:$b, Int16Regs:$c)>;
+ (FMARELU_BF16 $a, $b, $c)>;
def : Pat<(v2f16 (NVPTX_fmaxnum_nsz (NVPTX_fma_oneuse_and_nnan v2f16:$a, v2f16:$b, v2f16:$c), fpimm_positive_zero_v2f16)),
- (FMARELU_F16X2 Int32Regs:$a, Int32Regs:$b, Int32Regs:$c)>;
+ (FMARELU_F16X2 $a, $b, $c)>;
def : Pat<(v2bf16 (NVPTX_fmaxnum_nsz (NVPTX_fma_oneuse_and_nnan v2bf16:$a, v2bf16:$b, v2bf16:$c), fpimm_positive_zero_v2bf16)),
- (FMARELU_BF16X2 Int32Regs:$a, Int32Regs:$b, Int32Regs:$c)>;
+ (FMARELU_BF16X2 $a, $b, $c)>;
diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
index 0773c1b..8ede1ec 100644
--- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -824,29 +824,29 @@ def MBARRIER_PENDING_COUNT :
def : Pat<(int_nvvm_fmin_f immFloat1,
(int_nvvm_fmax_f immFloat0, f32:$a)),
- (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
+ (CVT_f32_f32 $a, CvtSAT)>;
def : Pat<(int_nvvm_fmin_f immFloat1,
(int_nvvm_fmax_f f32:$a, immFloat0)),
- (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
+ (CVT_f32_f32 $a, CvtSAT)>;
def : Pat<(int_nvvm_fmin_f
(int_nvvm_fmax_f immFloat0, f32:$a), immFloat1),
- (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
+ (CVT_f32_f32 $a, CvtSAT)>;
def : Pat<(int_nvvm_fmin_f
(int_nvvm_fmax_f f32:$a, immFloat0), immFloat1),
- (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
+ (CVT_f32_f32 $a, CvtSAT)>;
def : Pat<(int_nvvm_fmin_d immDouble1,
(int_nvvm_fmax_d immDouble0, f64:$a)),
- (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
+ (CVT_f64_f64 $a, CvtSAT)>;
def : Pat<(int_nvvm_fmin_d immDouble1,
(int_nvvm_fmax_d f64:$a, immDouble0)),
- (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
+ (CVT_f64_f64 $a, CvtSAT)>;
def : Pat<(int_nvvm_fmin_d
(int_nvvm_fmax_d immDouble0, f64:$a), immDouble1),
- (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
+ (CVT_f64_f64 $a, CvtSAT)>;
def : Pat<(int_nvvm_fmin_d
(int_nvvm_fmax_d f64:$a, immDouble0), immDouble1),
- (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
+ (CVT_f64_f64 $a, CvtSAT)>;
// We need a full string for OpcStr here because we need to deal with case like
@@ -1125,16 +1125,16 @@ def INT_NVVM_DIV_RP_D : F_MATH_2<"div.rp.f64 \t$dst, $src0, $src1;",
Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rp_d>;
def : Pat<(int_nvvm_div_full f32:$a, f32:$b),
- (FDIV32rr Float32Regs:$a, Float32Regs:$b)>;
+ (FDIV32rr $a, $b)>;
def : Pat<(int_nvvm_div_full f32:$a, fpimm:$b),
- (FDIV32ri Float32Regs:$a, f32imm:$b)>;
+ (FDIV32ri $a, f32imm:$b)>;
def : Pat<(int_nvvm_div_full_ftz f32:$a, f32:$b),
- (FDIV32rr_ftz Float32Regs:$a, Float32Regs:$b)>;
+ (FDIV32rr_ftz $a, $b)>;
def : Pat<(int_nvvm_div_full_ftz f32:$a, fpimm:$b),
- (FDIV32ri_ftz Float32Regs:$a, f32imm:$b)>;
+ (FDIV32ri_ftz $a, f32imm:$b)>;
//
// Sad
@@ -1158,18 +1158,18 @@ def INT_NVVM_SAD_ULL : F_MATH_3<"sad.u64 \t$dst, $src0, $src1, $src2;",
//
def : Pat<(int_nvvm_floor_ftz_f f32:$a),
- (CVT_f32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
+ (CVT_f32_f32 $a, CvtRMI_FTZ)>;
def : Pat<(int_nvvm_floor_f f32:$a),
- (CVT_f32_f32 Float32Regs:$a, CvtRMI)>;
+ (CVT_f32_f32 $a, CvtRMI)>;
def : Pat<(int_nvvm_floor_d f64:$a),
- (CVT_f64_f64 Float64Regs:$a, CvtRMI)>;
+ (CVT_f64_f64 $a, CvtRMI)>;
def : Pat<(int_nvvm_ceil_ftz_f f32:$a),
- (CVT_f32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
+ (CVT_f32_f32 $a, CvtRPI_FTZ)>;
def : Pat<(int_nvvm_ceil_f f32:$a),
- (CVT_f32_f32 Float32Regs:$a, CvtRPI)>;
+ (CVT_f32_f32 $a, CvtRPI)>;
def : Pat<(int_nvvm_ceil_d f64:$a),
- (CVT_f64_f64 Float64Regs:$a, CvtRPI)>;
+ (CVT_f64_f64 $a, CvtRPI)>;
//
// Abs
@@ -1217,33 +1217,33 @@ def INT_NVVM_NEG_BF16X2 : F_MATH_1<"neg.bf16x2 \t$dst, $src0;", Int32Regs,
//
def : Pat<(int_nvvm_round_ftz_f f32:$a),
- (CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
+ (CVT_f32_f32 $a, CvtRNI_FTZ)>;
def : Pat<(int_nvvm_round_f f32:$a),
- (CVT_f32_f32 Float32Regs:$a, CvtRNI)>;
+ (CVT_f32_f32 $a, CvtRNI)>;
def : Pat<(int_nvvm_round_d f64:$a),
- (CVT_f64_f64 Float64Regs:$a, CvtRNI)>;
+ (CVT_f64_f64 $a, CvtRNI)>;
//
// Trunc
//
def : Pat<(int_nvvm_trunc_ftz_f f32:$a),
- (CVT_f32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
+ (CVT_f32_f32 $a, CvtRZI_FTZ)>;
def : Pat<(int_nvvm_trunc_f f32:$a),
- (CVT_f32_f32 Float32Regs:$a, CvtRZI)>;
+ (CVT_f32_f32 $a, CvtRZI)>;
def : Pat<(int_nvvm_trunc_d f64:$a),
- (CVT_f64_f64 Float64Regs:$a, CvtRZI)>;
+ (CVT_f64_f64 $a, CvtRZI)>;
//
// Saturate
//
def : Pat<(int_nvvm_saturate_ftz_f f32:$a),
- (CVT_f32_f32 Float32Regs:$a, CvtSAT_FTZ)>;
+ (CVT_f32_f32 $a, CvtSAT_FTZ)>;
def : Pat<(int_nvvm_saturate_f f32:$a),
- (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
+ (CVT_f32_f32 $a, CvtSAT)>;
def : Pat<(int_nvvm_saturate_d f64:$a),
- (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
+ (CVT_f64_f64 $a, CvtSAT)>;
//
// Exp2 Log2
@@ -1430,13 +1430,13 @@ def INT_NVVM_SQRT_RP_D : F_MATH_1<"sqrt.rp.f64 \t$dst, $src0;", Float64Regs,
// nvvm_sqrt intrinsic
def : Pat<(int_nvvm_sqrt_f f32:$a),
- (INT_NVVM_SQRT_RN_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ, do_SQRTF32_RN]>;
+ (INT_NVVM_SQRT_RN_FTZ_F $a)>, Requires<[doF32FTZ, do_SQRTF32_RN]>;
def : Pat<(int_nvvm_sqrt_f f32:$a),
- (INT_NVVM_SQRT_RN_F Float32Regs:$a)>, Requires<[do_SQRTF32_RN]>;
+ (INT_NVVM_SQRT_RN_F $a)>, Requires<[do_SQRTF32_RN]>;
def : Pat<(int_nvvm_sqrt_f f32:$a),
- (INT_NVVM_SQRT_APPROX_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ]>;
+ (INT_NVVM_SQRT_APPROX_FTZ_F $a)>, Requires<[doF32FTZ]>;
def : Pat<(int_nvvm_sqrt_f f32:$a),
- (INT_NVVM_SQRT_APPROX_F Float32Regs:$a)>;
+ (INT_NVVM_SQRT_APPROX_F $a)>;
//
// Rsqrt
@@ -1456,24 +1456,24 @@ def INT_NVVM_RSQRT_APPROX_D : F_MATH_1<"rsqrt.approx.f64 \t$dst, $src0;",
// 1.0f / sqrt_approx -> rsqrt_approx
def: Pat<(fdiv FloatConst1, (int_nvvm_sqrt_approx_f f32:$a)),
- (INT_NVVM_RSQRT_APPROX_F Float32Regs:$a)>,
+ (INT_NVVM_RSQRT_APPROX_F $a)>,
Requires<[doRsqrtOpt]>;
def: Pat<(fdiv FloatConst1, (int_nvvm_sqrt_approx_ftz_f f32:$a)),
- (INT_NVVM_RSQRT_APPROX_FTZ_F Float32Regs:$a)>,
+ (INT_NVVM_RSQRT_APPROX_FTZ_F $a)>,
Requires<[doRsqrtOpt]>;
// same for int_nvvm_sqrt_f when non-precision sqrt is requested
def: Pat<(fdiv FloatConst1, (int_nvvm_sqrt_f f32:$a)),
- (INT_NVVM_RSQRT_APPROX_F Float32Regs:$a)>,
+ (INT_NVVM_RSQRT_APPROX_F $a)>,
Requires<[doRsqrtOpt, do_SQRTF32_APPROX, doNoF32FTZ]>;
def: Pat<(fdiv FloatConst1, (int_nvvm_sqrt_f f32:$a)),
- (INT_NVVM_RSQRT_APPROX_FTZ_F Float32Regs:$a)>,
+ (INT_NVVM_RSQRT_APPROX_FTZ_F $a)>,
Requires<[doRsqrtOpt, do_SQRTF32_APPROX, doF32FTZ]>;
def: Pat<(fdiv FloatConst1, (fsqrt f32:$a)),
- (INT_NVVM_RSQRT_APPROX_F Float32Regs:$a)>,
+ (INT_NVVM_RSQRT_APPROX_F $a)>,
Requires<[doRsqrtOpt, do_SQRTF32_APPROX, doNoF32FTZ]>;
def: Pat<(fdiv FloatConst1, (fsqrt f32:$a)),
- (INT_NVVM_RSQRT_APPROX_FTZ_F Float32Regs:$a)>,
+ (INT_NVVM_RSQRT_APPROX_FTZ_F $a)>,
Requires<[doRsqrtOpt, do_SQRTF32_APPROX, doF32FTZ]>;
//
// Add
@@ -1529,136 +1529,136 @@ foreach t = [I32RT, I64RT] in {
//
def : Pat<(int_nvvm_d2f_rn_ftz f64:$a),
- (CVT_f32_f64 Float64Regs:$a, CvtRN_FTZ)>;
+ (CVT_f32_f64 $a, CvtRN_FTZ)>;
def : Pat<(int_nvvm_d2f_rn f64:$a),
- (CVT_f32_f64 Float64Regs:$a, CvtRN)>;
+ (CVT_f32_f64 $a, CvtRN)>;
def : Pat<(int_nvvm_d2f_rz_ftz f64:$a),
- (CVT_f32_f64 Float64Regs:$a, CvtRZ_FTZ)>;
+ (CVT_f32_f64 $a, CvtRZ_FTZ)>;
def : Pat<(int_nvvm_d2f_rz f64:$a),
- (CVT_f32_f64 Float64Regs:$a, CvtRZ)>;
+ (CVT_f32_f64 $a, CvtRZ)>;
def : Pat<(int_nvvm_d2f_rm_ftz f64:$a),
- (CVT_f32_f64 Float64Regs:$a, CvtRM_FTZ)>;
+ (CVT_f32_f64 $a, CvtRM_FTZ)>;
def : Pat<(int_nvvm_d2f_rm f64:$a),
- (CVT_f32_f64 Float64Regs:$a, CvtRM)>;
+ (CVT_f32_f64 $a, CvtRM)>;
def : Pat<(int_nvvm_d2f_rp_ftz f64:$a),
- (CVT_f32_f64 Float64Regs:$a, CvtRP_FTZ)>;
+ (CVT_f32_f64 $a, CvtRP_FTZ)>;
def : Pat<(int_nvvm_d2f_rp f64:$a),
- (CVT_f32_f64 Float64Regs:$a, CvtRP)>;
+ (CVT_f32_f64 $a, CvtRP)>;
def : Pat<(int_nvvm_d2i_rn f64:$a),
- (CVT_s32_f64 Float64Regs:$a, CvtRNI)>;
+ (CVT_s32_f64 $a, CvtRNI)>;
def : Pat<(int_nvvm_d2i_rz f64:$a),
- (CVT_s32_f64 Float64Regs:$a, CvtRZI)>;
+ (CVT_s32_f64 $a, CvtRZI)>;
def : Pat<(int_nvvm_d2i_rm f64:$a),
- (CVT_s32_f64 Float64Regs:$a, CvtRMI)>;
+ (CVT_s32_f64 $a, CvtRMI)>;
def : Pat<(int_nvvm_d2i_rp f64:$a),
- (CVT_s32_f64 Float64Regs:$a, CvtRPI)>;
+ (CVT_s32_f64 $a, CvtRPI)>;
def : Pat<(int_nvvm_d2ui_rn f64:$a),
- (CVT_u32_f64 Float64Regs:$a, CvtRNI)>;
+ (CVT_u32_f64 $a, CvtRNI)>;
def : Pat<(int_nvvm_d2ui_rz f64:$a),
- (CVT_u32_f64 Float64Regs:$a, CvtRZI)>;
+ (CVT_u32_f64 $a, CvtRZI)>;
def : Pat<(int_nvvm_d2ui_rm f64:$a),
- (CVT_u32_f64 Float64Regs:$a, CvtRMI)>;
+ (CVT_u32_f64 $a, CvtRMI)>;
def : Pat<(int_nvvm_d2ui_rp f64:$a),
- (CVT_u32_f64 Float64Regs:$a, CvtRPI)>;
+ (CVT_u32_f64 $a, CvtRPI)>;
def : Pat<(int_nvvm_i2d_rn i32:$a),
- (CVT_f64_s32 Int32Regs:$a, CvtRN)>;
+ (CVT_f64_s32 $a, CvtRN)>;
def : Pat<(int_nvvm_i2d_rz i32:$a),
- (CVT_f64_s32 Int32Regs:$a, CvtRZ)>;
+ (CVT_f64_s32 $a, CvtRZ)>;
def : Pat<(int_nvvm_i2d_rm i32:$a),
- (CVT_f64_s32 Int32Regs:$a, CvtRM)>;
+ (CVT_f64_s32 $a, CvtRM)>;
def : Pat<(int_nvvm_i2d_rp i32:$a),
- (CVT_f64_s32 Int32Regs:$a, CvtRP)>;
+ (CVT_f64_s32 $a, CvtRP)>;
def : Pat<(int_nvvm_ui2d_rn i32:$a),
- (CVT_f64_u32 Int32Regs:$a, CvtRN)>;
+ (CVT_f64_u32 $a, CvtRN)>;
def : Pat<(int_nvvm_ui2d_rz i32:$a),
- (CVT_f64_u32 Int32Regs:$a, CvtRZ)>;
+ (CVT_f64_u32 $a, CvtRZ)>;
def : Pat<(int_nvvm_ui2d_rm i32:$a),
- (CVT_f64_u32 Int32Regs:$a, CvtRM)>;
+ (CVT_f64_u32 $a, CvtRM)>;
def : Pat<(int_nvvm_ui2d_rp i32:$a),
- (CVT_f64_u32 Int32Regs:$a, CvtRP)>;
+ (CVT_f64_u32 $a, CvtRP)>;
def : Pat<(int_nvvm_f2i_rn_ftz f32:$a),
- (CVT_s32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
+ (CVT_s32_f32 $a, CvtRNI_FTZ)>;
def : Pat<(int_nvvm_f2i_rn f32:$a),
- (CVT_s32_f32 Float32Regs:$a, CvtRNI)>;
+ (CVT_s32_f32 $a, CvtRNI)>;
def : Pat<(int_nvvm_f2i_rz_ftz f32:$a),
- (CVT_s32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
+ (CVT_s32_f32 $a, CvtRZI_FTZ)>;
def : Pat<(int_nvvm_f2i_rz f32:$a),
- (CVT_s32_f32 Float32Regs:$a, CvtRZI)>;
+ (CVT_s32_f32 $a, CvtRZI)>;
def : Pat<(int_nvvm_f2i_rm_ftz f32:$a),
- (CVT_s32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
+ (CVT_s32_f32 $a, CvtRMI_FTZ)>;
def : Pat<(int_nvvm_f2i_rm f32:$a),
- (CVT_s32_f32 Float32Regs:$a, CvtRMI)>;
+ (CVT_s32_f32 $a, CvtRMI)>;
def : Pat<(int_nvvm_f2i_rp_ftz f32:$a),
- (CVT_s32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
+ (CVT_s32_f32 $a, CvtRPI_FTZ)>;
def : Pat<(int_nvvm_f2i_rp f32:$a),
- (CVT_s32_f32 Float32Regs:$a, CvtRPI)>;
+ (CVT_s32_f32 $a, CvtRPI)>;
def : Pat<(int_nvvm_f2ui_rn_ftz f32:$a),
- (CVT_u32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
+ (CVT_u32_f32 $a, CvtRNI_FTZ)>;
def : Pat<(int_nvvm_f2ui_rn f32:$a),
- (CVT_u32_f32 Float32Regs:$a, CvtRNI)>;
+ (CVT_u32_f32 $a, CvtRNI)>;
def : Pat<(int_nvvm_f2ui_rz_ftz f32:$a),
- (CVT_u32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
+ (CVT_u32_f32 $a, CvtRZI_FTZ)>;
def : Pat<(int_nvvm_f2ui_rz f32:$a),
- (CVT_u32_f32 Float32Regs:$a, CvtRZI)>;
+ (CVT_u32_f32 $a, CvtRZI)>;
def : Pat<(int_nvvm_f2ui_rm_ftz f32:$a),
- (CVT_u32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
+ (CVT_u32_f32 $a, CvtRMI_FTZ)>;
def : Pat<(int_nvvm_f2ui_rm f32:$a),
- (CVT_u32_f32 Float32Regs:$a, CvtRMI)>;
+ (CVT_u32_f32 $a, CvtRMI)>;
def : Pat<(int_nvvm_f2ui_rp_ftz f32:$a),
- (CVT_u32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
+ (CVT_u32_f32 $a, CvtRPI_FTZ)>;
def : Pat<(int_nvvm_f2ui_rp f32:$a),
- (CVT_u32_f32 Float32Regs:$a, CvtRPI)>;
+ (CVT_u32_f32 $a, CvtRPI)>;
def : Pat<(int_nvvm_i2f_rn i32:$a),
- (CVT_f32_s32 Int32Regs:$a, CvtRN)>;
+ (CVT_f32_s32 $a, CvtRN)>;
def : Pat<(int_nvvm_i2f_rz i32:$a),
- (CVT_f32_s32 Int32Regs:$a, CvtRZ)>;
+ (CVT_f32_s32 $a, CvtRZ)>;
def : Pat<(int_nvvm_i2f_rm i32:$a),
- (CVT_f32_s32 Int32Regs:$a, CvtRM)>;
+ (CVT_f32_s32 $a, CvtRM)>;
def : Pat<(int_nvvm_i2f_rp i32:$a),
- (CVT_f32_s32 Int32Regs:$a, CvtRP)>;
+ (CVT_f32_s32 $a, CvtRP)>;
def : Pat<(int_nvvm_ui2f_rn i32:$a),
- (CVT_f32_u32 Int32Regs:$a, CvtRN)>;
+ (CVT_f32_u32 $a, CvtRN)>;
def : Pat<(int_nvvm_ui2f_rz i32:$a),
- (CVT_f32_u32 Int32Regs:$a, CvtRZ)>;
+ (CVT_f32_u32 $a, CvtRZ)>;
def : Pat<(int_nvvm_ui2f_rm i32:$a),
- (CVT_f32_u32 Int32Regs:$a, CvtRM)>;
+ (CVT_f32_u32 $a, CvtRM)>;
def : Pat<(int_nvvm_ui2f_rp i32:$a),
- (CVT_f32_u32 Int32Regs:$a, CvtRP)>;
+ (CVT_f32_u32 $a, CvtRP)>;
def : Pat<(int_nvvm_ff2bf16x2_rn f32:$a, f32:$b),
- (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN)>;
+ (CVT_bf16x2_f32 $a, $b, CvtRN)>;
def : Pat<(int_nvvm_ff2bf16x2_rn_relu f32:$a, f32:$b),
- (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN_RELU)>;
+ (CVT_bf16x2_f32 $a, $b, CvtRN_RELU)>;
def : Pat<(int_nvvm_ff2bf16x2_rz f32:$a, f32:$b),
- (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ)>;
+ (CVT_bf16x2_f32 $a, $b, CvtRZ)>;
def : Pat<(int_nvvm_ff2bf16x2_rz_relu f32:$a, f32:$b),
- (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ_RELU)>;
+ (CVT_bf16x2_f32 $a, $b, CvtRZ_RELU)>;
def : Pat<(int_nvvm_ff2f16x2_rn f32:$a, f32:$b),
- (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN)>;
+ (CVT_f16x2_f32 $a, $b, CvtRN)>;
def : Pat<(int_nvvm_ff2f16x2_rn_relu f32:$a, f32:$b),
- (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN_RELU)>;
+ (CVT_f16x2_f32 $a, $b, CvtRN_RELU)>;
def : Pat<(int_nvvm_ff2f16x2_rz f32:$a, f32:$b),
- (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ)>;
+ (CVT_f16x2_f32 $a, $b, CvtRZ)>;
def : Pat<(int_nvvm_ff2f16x2_rz_relu f32:$a, f32:$b),
- (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ_RELU)>;
+ (CVT_f16x2_f32 $a, $b, CvtRZ_RELU)>;
def : Pat<(int_nvvm_f2bf16_rn f32:$a),
- (CVT_bf16_f32 Float32Regs:$a, CvtRN)>;
+ (CVT_bf16_f32 $a, CvtRN)>;
def : Pat<(int_nvvm_f2bf16_rn_relu f32:$a),
- (CVT_bf16_f32 Float32Regs:$a, CvtRN_RELU)>;
+ (CVT_bf16_f32 $a, CvtRN_RELU)>;
def : Pat<(int_nvvm_f2bf16_rz f32:$a),
- (CVT_bf16_f32 Float32Regs:$a, CvtRZ)>;
+ (CVT_bf16_f32 $a, CvtRZ)>;
def : Pat<(int_nvvm_f2bf16_rz_relu f32:$a),
- (CVT_bf16_f32 Float32Regs:$a, CvtRZ_RELU)>;
+ (CVT_bf16_f32 $a, CvtRZ_RELU)>;
def CVT_tf32_f32 :
NVPTXInst<(outs Int32Regs:$dest), (ins Float32Regs:$a),
@@ -1682,125 +1682,125 @@ def INT_NVVM_D2I_HI : F_MATH_1<
Int32Regs, Float64Regs, int_nvvm_d2i_hi>;
def : Pat<(int_nvvm_f2ll_rn_ftz f32:$a),
- (CVT_s64_f32 Float32Regs:$a, CvtRNI_FTZ)>;
+ (CVT_s64_f32 $a, CvtRNI_FTZ)>;
def : Pat<(int_nvvm_f2ll_rn f32:$a),
- (CVT_s64_f32 Float32Regs:$a, CvtRNI)>;
+ (CVT_s64_f32 $a, CvtRNI)>;
def : Pat<(int_nvvm_f2ll_rz_ftz f32:$a),
- (CVT_s64_f32 Float32Regs:$a, CvtRZI_FTZ)>;
+ (CVT_s64_f32 $a, CvtRZI_FTZ)>;
def : Pat<(int_nvvm_f2ll_rz f32:$a),
- (CVT_s64_f32 Float32Regs:$a, CvtRZI)>;
+ (CVT_s64_f32 $a, CvtRZI)>;
def : Pat<(int_nvvm_f2ll_rm_ftz f32:$a),
- (CVT_s64_f32 Float32Regs:$a, CvtRMI_FTZ)>;
+ (CVT_s64_f32 $a, CvtRMI_FTZ)>;
def : Pat<(int_nvvm_f2ll_rm f32:$a),
- (CVT_s64_f32 Float32Regs:$a, CvtRMI)>;
+ (CVT_s64_f32 $a, CvtRMI)>;
def : Pat<(int_nvvm_f2ll_rp_ftz f32:$a),
- (CVT_s64_f32 Float32Regs:$a, CvtRPI_FTZ)>;
+ (CVT_s64_f32 $a, CvtRPI_FTZ)>;
def : Pat<(int_nvvm_f2ll_rp f32:$a),
- (CVT_s64_f32 Float32Regs:$a, CvtRPI)>;
+ (CVT_s64_f32 $a, CvtRPI)>;
def : Pat<(int_nvvm_f2ull_rn_ftz f32:$a),
- (CVT_u64_f32 Float32Regs:$a, CvtRNI_FTZ)>;
+ (CVT_u64_f32 $a, CvtRNI_FTZ)>;
def : Pat<(int_nvvm_f2ull_rn f32:$a),
- (CVT_u64_f32 Float32Regs:$a, CvtRNI)>;
+ (CVT_u64_f32 $a, CvtRNI)>;
def : Pat<(int_nvvm_f2ull_rz_ftz f32:$a),
- (CVT_u64_f32 Float32Regs:$a, CvtRZI_FTZ)>;
+ (CVT_u64_f32 $a, CvtRZI_FTZ)>;
def : Pat<(int_nvvm_f2ull_rz f32:$a),
- (CVT_u64_f32 Float32Regs:$a, CvtRZI)>;
+ (CVT_u64_f32 $a, CvtRZI)>;
def : Pat<(int_nvvm_f2ull_rm_ftz f32:$a),
- (CVT_u64_f32 Float32Regs:$a, CvtRMI_FTZ)>;
+ (CVT_u64_f32 $a, CvtRMI_FTZ)>;
def : Pat<(int_nvvm_f2ull_rm f32:$a),
- (CVT_u64_f32 Float32Regs:$a, CvtRMI)>;
+ (CVT_u64_f32 $a, CvtRMI)>;
def : Pat<(int_nvvm_f2ull_rp_ftz f32:$a),
- (CVT_u64_f32 Float32Regs:$a, CvtRPI_FTZ)>;
+ (CVT_u64_f32 $a, CvtRPI_FTZ)>;
def : Pat<(int_nvvm_f2ull_rp f32:$a),
- (CVT_u64_f32 Float32Regs:$a, CvtRPI)>;
+ (CVT_u64_f32 $a, CvtRPI)>;
def : Pat<(int_nvvm_d2ll_rn f64:$a),
- (CVT_s64_f64 Float64Regs:$a, CvtRNI)>;
+ (CVT_s64_f64 $a, CvtRNI)>;
def : Pat<(int_nvvm_d2ll_rz f64:$a),
- (CVT_s64_f64 Float64Regs:$a, CvtRZI)>;
+ (CVT_s64_f64 $a, CvtRZI)>;
def : Pat<(int_nvvm_d2ll_rm f64:$a),
- (CVT_s64_f64 Float64Regs:$a, CvtRMI)>;
+ (CVT_s64_f64 $a, CvtRMI)>;
def : Pat<(int_nvvm_d2ll_rp f64:$a),
- (CVT_s64_f64 Float64Regs:$a, CvtRPI)>;
+ (CVT_s64_f64 $a, CvtRPI)>;
def : Pat<(int_nvvm_d2ull_rn f64:$a),
- (CVT_u64_f64 Float64Regs:$a, CvtRNI)>;
+ (CVT_u64_f64 $a, CvtRNI)>;
def : Pat<(int_nvvm_d2ull_rz f64:$a),
- (CVT_u64_f64 Float64Regs:$a, CvtRZI)>;
+ (CVT_u64_f64 $a, CvtRZI)>;
def : Pat<(int_nvvm_d2ull_rm f64:$a),
- (CVT_u64_f64 Float64Regs:$a, CvtRMI)>;
+ (CVT_u64_f64 $a, CvtRMI)>;
def : Pat<(int_nvvm_d2ull_rp f64:$a),
- (CVT_u64_f64 Float64Regs:$a, CvtRPI)>;
+ (CVT_u64_f64 $a, CvtRPI)>;
def : Pat<(int_nvvm_ll2f_rn i64:$a),
- (CVT_f32_s64 Int64Regs:$a, CvtRN)>;
+ (CVT_f32_s64 $a, CvtRN)>;
def : Pat<(int_nvvm_ll2f_rz i64:$a),
- (CVT_f32_s64 Int64Regs:$a, CvtRZ)>;
+ (CVT_f32_s64 $a, CvtRZ)>;
def : Pat<(int_nvvm_ll2f_rm i64:$a),
- (CVT_f32_s64 Int64Regs:$a, CvtRM)>;
+ (CVT_f32_s64 $a, CvtRM)>;
def : Pat<(int_nvvm_ll2f_rp i64:$a),
- (CVT_f32_s64 Int64Regs:$a, CvtRP)>;
+ (CVT_f32_s64 $a, CvtRP)>;
def : Pat<(int_nvvm_ull2f_rn i64:$a),
- (CVT_f32_u64 Int64Regs:$a, CvtRN)>;
+ (CVT_f32_u64 $a, CvtRN)>;
def : Pat<(int_nvvm_ull2f_rz i64:$a),
- (CVT_f32_u64 Int64Regs:$a, CvtRZ)>;
+ (CVT_f32_u64 $a, CvtRZ)>;
def : Pat<(int_nvvm_ull2f_rm i64:$a),
- (CVT_f32_u64 Int64Regs:$a, CvtRM)>;
+ (CVT_f32_u64 $a, CvtRM)>;
def : Pat<(int_nvvm_ull2f_rp i64:$a),
- (CVT_f32_u64 Int64Regs:$a, CvtRP)>;
+ (CVT_f32_u64 $a, CvtRP)>;
def : Pat<(int_nvvm_ll2d_rn i64:$a),
- (CVT_f64_s64 Int64Regs:$a, CvtRN)>;
+ (CVT_f64_s64 $a, CvtRN)>;
def : Pat<(int_nvvm_ll2d_rz i64:$a),
- (CVT_f64_s64 Int64Regs:$a, CvtRZ)>;
+ (CVT_f64_s64 $a, CvtRZ)>;
def : Pat<(int_nvvm_ll2d_rm i64:$a),
- (CVT_f64_s64 Int64Regs:$a, CvtRM)>;
+ (CVT_f64_s64 $a, CvtRM)>;
def : Pat<(int_nvvm_ll2d_rp i64:$a),
- (CVT_f64_s64 Int64Regs:$a, CvtRP)>;
+ (CVT_f64_s64 $a, CvtRP)>;
def : Pat<(int_nvvm_ull2d_rn i64:$a),
- (CVT_f64_u64 Int64Regs:$a, CvtRN)>;
+ (CVT_f64_u64 $a, CvtRN)>;
def : Pat<(int_nvvm_ull2d_rz i64:$a),
- (CVT_f64_u64 Int64Regs:$a, CvtRZ)>;
+ (CVT_f64_u64 $a, CvtRZ)>;
def : Pat<(int_nvvm_ull2d_rm i64:$a),
- (CVT_f64_u64 Int64Regs:$a, CvtRM)>;
+ (CVT_f64_u64 $a, CvtRM)>;
def : Pat<(int_nvvm_ull2d_rp i64:$a),
- (CVT_f64_u64 Int64Regs:$a, CvtRP)>;
+ (CVT_f64_u64 $a, CvtRP)>;
def : Pat<(int_nvvm_f2h_rn_ftz f32:$a),
- (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ)>;
+ (CVT_f16_f32 $a, CvtRN_FTZ)>;
def : Pat<(int_nvvm_f2h_rn f32:$a),
- (CVT_f16_f32 Float32Regs:$a, CvtRN)>;
+ (CVT_f16_f32 $a, CvtRN)>;
def : Pat<(int_nvvm_ff_to_e4m3x2_rn f32:$a, f32:$b),
- (CVT_e4m3x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN)>;
+ (CVT_e4m3x2_f32 $a, $b, CvtRN)>;
def : Pat<(int_nvvm_ff_to_e4m3x2_rn_relu f32:$a, f32:$b),
- (CVT_e4m3x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN_RELU)>;
+ (CVT_e4m3x2_f32 $a, $b, CvtRN_RELU)>;
def : Pat<(int_nvvm_ff_to_e5m2x2_rn f32:$a, f32:$b),
- (CVT_e5m2x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN)>;
+ (CVT_e5m2x2_f32 $a, $b, CvtRN)>;
def : Pat<(int_nvvm_ff_to_e5m2x2_rn_relu f32:$a, f32:$b),
- (CVT_e5m2x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN_RELU)>;
+ (CVT_e5m2x2_f32 $a, $b, CvtRN_RELU)>;
def : Pat<(int_nvvm_f16x2_to_e4m3x2_rn Int32Regs:$a),
- (CVT_e4m3x2_f16x2 Int32Regs:$a, CvtRN)>;
+ (CVT_e4m3x2_f16x2 $a, CvtRN)>;
def : Pat<(int_nvvm_f16x2_to_e4m3x2_rn_relu Int32Regs:$a),
- (CVT_e4m3x2_f16x2 Int32Regs:$a, CvtRN_RELU)>;
+ (CVT_e4m3x2_f16x2 $a, CvtRN_RELU)>;
def : Pat<(int_nvvm_f16x2_to_e5m2x2_rn Int32Regs:$a),
- (CVT_e5m2x2_f16x2 Int32Regs:$a, CvtRN)>;
+ (CVT_e5m2x2_f16x2 $a, CvtRN)>;
def : Pat<(int_nvvm_f16x2_to_e5m2x2_rn_relu Int32Regs:$a),
- (CVT_e5m2x2_f16x2 Int32Regs:$a, CvtRN_RELU)>;
+ (CVT_e5m2x2_f16x2 $a, CvtRN_RELU)>;
def : Pat<(int_nvvm_e4m3x2_to_f16x2_rn Int16Regs:$a),
- (CVT_f16x2_e4m3x2 Int16Regs:$a, CvtRN)>;
+ (CVT_f16x2_e4m3x2 $a, CvtRN)>;
def : Pat<(int_nvvm_e4m3x2_to_f16x2_rn_relu Int16Regs:$a),
- (CVT_f16x2_e4m3x2 Int16Regs:$a, CvtRN_RELU)>;
+ (CVT_f16x2_e4m3x2 $a, CvtRN_RELU)>;
def : Pat<(int_nvvm_e5m2x2_to_f16x2_rn Int16Regs:$a),
- (CVT_f16x2_e5m2x2 Int16Regs:$a, CvtRN)>;
+ (CVT_f16x2_e5m2x2 $a, CvtRN)>;
def : Pat<(int_nvvm_e5m2x2_to_f16x2_rn_relu Int16Regs:$a),
- (CVT_f16x2_e5m2x2 Int16Regs:$a, CvtRN_RELU)>;
+ (CVT_f16x2_e5m2x2 $a, CvtRN_RELU)>;
//
// FNS
@@ -1823,9 +1823,9 @@ def INT_FNS_rii : INT_FNS_MBO<(ins Int32Regs:$mask, i32imm:$base, i32imm:$
def INT_FNS_irr : INT_FNS_MBO<(ins i32imm:$mask, Int32Regs:$base, Int32Regs:$offset),
(int_nvvm_fns imm:$mask, i32:$base, i32:$offset)>;
def INT_FNS_iri : INT_FNS_MBO<(ins i32imm:$mask, Int32Regs:$base, i32imm:$offset),
- (int_nvvm_fns imm:$mask, Int32Regs:$base, imm:$offset)>;
+ (int_nvvm_fns imm:$mask, i32:$base, imm:$offset)>;
def INT_FNS_iir : INT_FNS_MBO<(ins i32imm:$mask, i32imm:$base, Int32Regs:$offset),
- (int_nvvm_fns imm:$mask, imm:$base, Int32Regs:$offset)>;
+ (int_nvvm_fns imm:$mask, imm:$base, i32:$offset)>;
def INT_FNS_iii : INT_FNS_MBO<(ins i32imm:$mask, i32imm:$base, i32imm:$offset),
(int_nvvm_fns imm:$mask, imm:$base, imm:$offset)>;
@@ -2796,10 +2796,10 @@ defm cvta_to_const : G_TO_NG<"const">;
defm cvta_param : NG_TO_G<"param">;
def : Pat<(int_nvvm_ptr_param_to_gen i32:$src),
- (cvta_param Int32Regs:$src)>;
+ (cvta_param $src)>;
def : Pat<(int_nvvm_ptr_param_to_gen i64:$src),
- (cvta_param_64 Int64Regs:$src)>;
+ (cvta_param_64 $src)>;
// nvvm.ptr.gen.to.param
def : Pat<(int_nvvm_ptr_gen_to_param i32:$src),
@@ -2933,8 +2933,8 @@ def : Pat<(int_nvvm_read_ptx_sreg_envreg31), (MOV_SPECIAL ENVREG31)>;
def : Pat<(int_nvvm_swap_lo_hi_b64 i64:$src),
- (V2I32toI64 (I64toI32H Int64Regs:$src),
- (I64toI32L Int64Regs:$src))> ;
+ (V2I32toI64 (I64toI32H $src),
+ (I64toI32L $src))> ;
//-----------------------------------
// Texture Intrinsics
@@ -5040,21 +5040,21 @@ def TXQ_NUM_MIPMAP_LEVELS_I
}
def : Pat<(int_nvvm_txq_channel_order i64:$a),
- (TXQ_CHANNEL_ORDER_R i64:$a)>;
+ (TXQ_CHANNEL_ORDER_R $a)>;
def : Pat<(int_nvvm_txq_channel_data_type i64:$a),
- (TXQ_CHANNEL_DATA_TYPE_R i64:$a)>;
+ (TXQ_CHANNEL_DATA_TYPE_R $a)>;
def : Pat<(int_nvvm_txq_width i64:$a),
- (TXQ_WIDTH_R i64:$a)>;
+ (TXQ_WIDTH_R $a)>;
def : Pat<(int_nvvm_txq_height i64:$a),
- (TXQ_HEIGHT_R i64:$a)>;
+ (TXQ_HEIGHT_R $a)>;
def : Pat<(int_nvvm_txq_depth i64:$a),
- (TXQ_DEPTH_R i64:$a)>;
+ (TXQ_DEPTH_R $a)>;
def : Pat<(int_nvvm_txq_array_size i64:$a),
- (TXQ_ARRAY_SIZE_R i64:$a)>;
+ (TXQ_ARRAY_SIZE_R $a)>;
def : Pat<(int_nvvm_txq_num_samples i64:$a),
- (TXQ_NUM_SAMPLES_R i64:$a)>;
+ (TXQ_NUM_SAMPLES_R $a)>;
def : Pat<(int_nvvm_txq_num_mipmap_levels i64:$a),
- (TXQ_NUM_MIPMAP_LEVELS_R i64:$a)>;
+ (TXQ_NUM_MIPMAP_LEVELS_R $a)>;
//-----------------------------------
@@ -5113,17 +5113,17 @@ def SUQ_ARRAY_SIZE_I
}
def : Pat<(int_nvvm_suq_channel_order i64:$a),
- (SUQ_CHANNEL_ORDER_R Int64Regs:$a)>;
+ (SUQ_CHANNEL_ORDER_R $a)>;
def : Pat<(int_nvvm_suq_channel_data_type i64:$a),
- (SUQ_CHANNEL_DATA_TYPE_R Int64Regs:$a)>;
+ (SUQ_CHANNEL_DATA_TYPE_R $a)>;
def : Pat<(int_nvvm_suq_width i64:$a),
- (SUQ_WIDTH_R Int64Regs:$a)>;
+ (SUQ_WIDTH_R $a)>;
def : Pat<(int_nvvm_suq_height i64:$a),
- (SUQ_HEIGHT_R Int64Regs:$a)>;
+ (SUQ_HEIGHT_R $a)>;
def : Pat<(int_nvvm_suq_depth i64:$a),
- (SUQ_DEPTH_R Int64Regs:$a)>;
+ (SUQ_DEPTH_R $a)>;
def : Pat<(int_nvvm_suq_array_size i64:$a),
- (SUQ_ARRAY_SIZE_R Int64Regs:$a)>;
+ (SUQ_ARRAY_SIZE_R $a)>;
//===- Handle Query -------------------------------------------------------===//
diff --git a/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp b/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp
index 42043ad..74ce6a9 100644
--- a/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp
@@ -34,19 +34,18 @@ void NVPTXSubtarget::anchor() {}
NVPTXSubtarget &NVPTXSubtarget::initializeSubtargetDependencies(StringRef CPU,
StringRef FS) {
- // Provide the default CPU if we don't have one.
- TargetName = std::string(CPU.empty() ? "sm_30" : CPU);
+ TargetName = std::string(CPU);
- ParseSubtargetFeatures(TargetName, /*TuneCPU*/ TargetName, FS);
+ ParseSubtargetFeatures(getTargetName(), /*TuneCPU=*/getTargetName(), FS);
- // Re-map SM version numbers, SmVersion carries the regular SMs which do
- // have relative order, while FullSmVersion allows distinguishing sm_90 from
- // sm_90a, which would *not* be a subset of sm_91.
- SmVersion = getSmVersion();
+ // Re-map SM version numbers, SmVersion carries the regular SMs which do
+ // have relative order, while FullSmVersion allows distinguishing sm_90 from
+ // sm_90a, which would *not* be a subset of sm_91.
+ SmVersion = getSmVersion();
- // Set default to PTX 6.0 (CUDA 9.0)
- if (PTXVersion == 0) {
- PTXVersion = 60;
+ // Set default to PTX 6.0 (CUDA 9.0)
+ if (PTXVersion == 0) {
+ PTXVersion = 60;
}
return *this;
diff --git a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
index 7555a23..bbc1cca 100644
--- a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
+++ b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
@@ -111,7 +111,12 @@ public:
// - 0 represents base GPU model,
// - non-zero value identifies particular architecture-accelerated variant.
bool hasAAFeatures() const { return getFullSmVersion() % 10; }
- std::string getTargetName() const { return TargetName; }
+
+ // If the user did not provide a target we default to the `sm_30` target.
+ std::string getTargetName() const {
+ return TargetName.empty() ? "sm_30" : TargetName;
+ }
+ bool hasTargetName() const { return !TargetName.empty(); }
// Get maximum value of required alignments among the supported data types.
// From the PTX ISA doc, section 8.2.3:
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index b3b2880..6d4b82a 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -255,7 +255,10 @@ void NVPTXTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
PB.registerPipelineStartEPCallback(
[this](ModulePassManager &PM, OptimizationLevel Level) {
FunctionPassManager FPM;
- FPM.addPass(NVVMReflectPass(Subtarget.getSmVersion()));
+ // We do not want to fold out calls to nvvm.reflect early if the user
+ // has not provided a target architecture just yet.
+ if (Subtarget.hasTargetName())
+ FPM.addPass(NVVMReflectPass(Subtarget.getSmVersion()));
// Note: NVVMIntrRangePass was causing numerical discrepancies at one
// point, if issues crop up, consider disabling.
FPM.addPass(NVVMIntrRangePass());
diff --git a/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp b/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
index 98bffd9..0f2bec7 100644
--- a/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
@@ -311,11 +311,13 @@ std::optional<unsigned> getMaxNReg(const Function &F) {
}
bool isKernelFunction(const Function &F) {
+ if (F.getCallingConv() == CallingConv::PTX_Kernel)
+ return true;
+
if (const auto X = findOneNVVMAnnotation(&F, "kernel"))
return (*X == 1);
- // There is no NVVM metadata, check the calling convention
- return F.getCallingConv() == CallingConv::PTX_Kernel;
+ return false;
}
MaybeAlign getAlign(const Function &F, unsigned Index) {
diff --git a/llvm/lib/Target/NVPTX/NVVMReflect.cpp b/llvm/lib/Target/NVPTX/NVVMReflect.cpp
index 56525a1..0cd584c 100644
--- a/llvm/lib/Target/NVPTX/NVVMReflect.cpp
+++ b/llvm/lib/Target/NVPTX/NVVMReflect.cpp
@@ -21,6 +21,7 @@
#include "NVPTX.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/CodeGen/CommandFlags.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
@@ -219,7 +220,13 @@ bool NVVMReflect::runOnFunction(Function &F) {
return runNVVMReflect(F, SmVersion);
}
-NVVMReflectPass::NVVMReflectPass() : NVVMReflectPass(0) {}
+NVVMReflectPass::NVVMReflectPass() {
+ // Get the CPU string from the command line if not provided.
+ std::string MCPU = codegen::getMCPU();
+ StringRef SM = MCPU;
+ if (!SM.consume_front("sm_") || SM.consumeInteger(10, SmVersion))
+ SmVersion = 0;
+}
PreservedAnalyses NVVMReflectPass::run(Function &F,
FunctionAnalysisManager &AM) {
diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 073857e..162d110 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -2230,10 +2230,6 @@ void PPCLinuxAsmPrinter::emitFunctionBodyEnd() {
void PPCAIXAsmPrinter::emitLinkage(const GlobalValue *GV,
MCSymbol *GVSym) const {
-
- assert(MAI->hasVisibilityOnlyWithLinkage() &&
- "AIX's linkage directives take a visibility setting.");
-
MCSymbolAttr LinkageAttr = MCSA_Invalid;
switch (GV->getLinkage()) {
case GlobalValue::ExternalLinkage:
@@ -3251,9 +3247,15 @@ void PPCAIXAsmPrinter::emitInstruction(const MachineInstr *MI) {
bool PPCAIXAsmPrinter::doFinalization(Module &M) {
// Do streamer related finalization for DWARF.
- if (!MAI->usesDwarfFileAndLocDirectives() && hasDebugInfo())
- OutStreamer->doFinalizationAtSectionEnd(
- OutStreamer->getContext().getObjectFileInfo()->getTextSection());
+ if (hasDebugInfo()) {
+ // Emit section end. This is used to tell the debug line section where the
+ // end is for a text section if we don't use .loc to represent the debug
+ // line.
+ auto *Sec = OutContext.getObjectFileInfo()->getTextSection();
+ OutStreamer->switchSectionNoPrint(Sec);
+ MCSymbol *Sym = Sec->getEndSymbol(OutContext);
+ OutStreamer->emitLabel(Sym);
+ }
for (MCSymbol *Sym : ExtSymSDNodeSymbols)
OutStreamer->emitSymbolAttribute(Sym, MCSA_Extern);
diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 2e0ee59..d1daf7c 100644
--- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -5473,10 +5473,10 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
// generate secure plt code for TLS symbols.
getGlobalBaseReg();
} break;
- case PPCISD::CALL: {
- if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) != MVT::i32 ||
- !TM.isPositionIndependent() || !Subtarget->isSecurePlt() ||
- !Subtarget->isTargetELF())
+ case PPCISD::CALL:
+ case PPCISD::CALL_RM: {
+ if (Subtarget->isPPC64() || !TM.isPositionIndependent() ||
+ !Subtarget->isSecurePlt() || !Subtarget->isTargetELF())
break;
SDValue Op = N->getOperand(1);
@@ -5489,8 +5489,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
if (ES->getTargetFlags() == PPCII::MO_PLT)
getGlobalBaseReg();
}
- }
- break;
+ } break;
case PPCISD::GlobalBaseReg:
ReplaceNode(N, getGlobalBaseReg());
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 44f6db5..fa45a7f 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -643,8 +643,8 @@ bool PPCInstrInfo::shouldReduceRegisterPressure(
};
// For now we only care about float and double type fma.
- unsigned VSSRCLimit = TRI->getRegPressureSetLimit(
- *MBB->getParent(), PPC::RegisterPressureSets::VSSRC);
+ unsigned VSSRCLimit =
+ RegClassInfo->getRegPressureSetLimit(PPC::RegisterPressureSets::VSSRC);
// Only reduce register pressure when pressure is high.
return GetMBBPressure(MBB)[PPC::RegisterPressureSets::VSSRC] >
diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
index 9dcf2e9..2205c67 100644
--- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
+++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
@@ -734,6 +734,16 @@ public:
VK == RISCVMCExpr::VK_RISCV_None;
}
+ bool isUImm5GT3() const {
+ if (!isImm())
+ return false;
+ RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
+ int64_t Imm;
+ bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
+ return IsConstantImm && isUInt<5>(Imm) && (Imm > 3) &&
+ VK == RISCVMCExpr::VK_RISCV_None;
+ }
+
bool isUImm8GE32() const {
int64_t Imm;
RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
@@ -1520,6 +1530,8 @@ bool RISCVAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 5) - 1);
case Match_InvalidUImm5NonZero:
return generateImmOutOfRangeError(Operands, ErrorInfo, 1, (1 << 5) - 1);
+ case Match_InvalidUImm5GT3:
+ return generateImmOutOfRangeError(Operands, ErrorInfo, 4, (1 << 5) - 1);
case Match_InvalidUImm6:
return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 6) - 1);
case Match_InvalidUImm7:
@@ -1903,6 +1915,8 @@ ParseStatus RISCVAsmParser::parseCSRSystemRegister(OperandVector &Operands) {
// Accept an immediate representing a named Sys Reg if it satisfies the
// the required features.
for (auto &Reg : Range) {
+ if (Reg.IsAltName || Reg.IsDeprecatedName)
+ continue;
if (Reg.haveRequiredFeatures(STI->getFeatureBits()))
return RISCVOperand::createSysReg(Reg.Name, S, Imm);
}
@@ -1940,22 +1954,27 @@ ParseStatus RISCVAsmParser::parseCSRSystemRegister(OperandVector &Operands) {
return ParseStatus::Failure;
const auto *SysReg = RISCVSysReg::lookupSysRegByName(Identifier);
- if (!SysReg)
- SysReg = RISCVSysReg::lookupSysRegByAltName(Identifier);
- if (!SysReg)
- if ((SysReg = RISCVSysReg::lookupSysRegByDeprecatedName(Identifier)))
- Warning(S, "'" + Identifier + "' is a deprecated alias for '" +
- SysReg->Name + "'");
-
- // Accept a named Sys Reg if the required features are present.
+
if (SysReg) {
+ if (SysReg->IsDeprecatedName) {
+ // Lookup the undeprecated name.
+ auto Range = RISCVSysReg::lookupSysRegByEncoding(SysReg->Encoding);
+ for (auto &Reg : Range) {
+ if (Reg.IsAltName || Reg.IsDeprecatedName)
+ continue;
+ Warning(S, "'" + Identifier + "' is a deprecated alias for '" +
+ Reg.Name + "'");
+ }
+ }
+
+ // Accept a named Sys Reg if the required features are present.
const auto &FeatureBits = getSTI().getFeatureBits();
if (!SysReg->haveRequiredFeatures(FeatureBits)) {
const auto *Feature = llvm::find_if(RISCVFeatureKV, [&](auto Feature) {
return SysReg->FeaturesRequired[Feature.Value];
});
auto ErrorMsg = std::string("system register '") + SysReg->Name + "' ";
- if (SysReg->isRV32Only && FeatureBits[RISCV::Feature64Bit]) {
+ if (SysReg->IsRV32Only && FeatureBits[RISCV::Feature64Bit]) {
ErrorMsg += "is RV32 only";
if (Feature != std::end(RISCVFeatureKV))
ErrorMsg += " and ";
diff --git a/llvm/lib/Target/RISCV/CMakeLists.txt b/llvm/lib/Target/RISCV/CMakeLists.txt
index 4466164..98d3615 100644
--- a/llvm/lib/Target/RISCV/CMakeLists.txt
+++ b/llvm/lib/Target/RISCV/CMakeLists.txt
@@ -15,6 +15,7 @@ tablegen(LLVM RISCVGenRegisterBank.inc -gen-register-bank)
tablegen(LLVM RISCVGenRegisterInfo.inc -gen-register-info)
tablegen(LLVM RISCVGenSearchableTables.inc -gen-searchable-tables)
tablegen(LLVM RISCVGenSubtargetInfo.inc -gen-subtarget)
+tablegen(LLVM RISCVGenExegesis.inc -gen-exegesis)
set(LLVM_TARGET_DEFINITIONS RISCVGISel.td)
tablegen(LLVM RISCVGenGlobalISel.inc -gen-global-isel)
diff --git a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
index 9901719..a490910 100644
--- a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
+++ b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
@@ -692,6 +692,14 @@ DecodeStatus RISCVDisassembler::getInstruction32(MCInst &MI, uint64_t &Size,
"Qualcomm uC Conditional Select custom opcode table");
TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXqcilsm, DecoderTableXqcilsm32,
"Qualcomm uC Load Store Multiple custom opcode table");
+ TRY_TO_DECODE_FEATURE(
+ RISCV::FeatureVendorXqciac, DecoderTableXqciac32,
+ "Qualcomm uC Load-Store Address Calculation custom opcode table");
+ TRY_TO_DECODE_FEATURE(
+ RISCV::FeatureVendorXqcicli, DecoderTableXqcicli32,
+ "Qualcomm uC Conditional Load Immediate custom opcode table");
+ TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXqcicm, DecoderTableXqcicm32,
+ "Qualcomm uC Conditional Move custom opcode table");
TRY_TO_DECODE(true, DecoderTable32, "RISCV32 table");
return MCDisassembler::Fail;
@@ -718,6 +726,12 @@ DecodeStatus RISCVDisassembler::getInstruction16(MCInst &MI, uint64_t &Size,
TRY_TO_DECODE_FEATURE(
RISCV::FeatureStdExtZcmp, DecoderTableRVZcmp16,
"Zcmp table (16-bit Push/Pop & Double Move Instructions)");
+ TRY_TO_DECODE_FEATURE(
+ RISCV::FeatureVendorXqciac, DecoderTableXqciac16,
+ "Qualcomm uC Load-Store Address Calculation custom 16bit opcode table");
+ TRY_TO_DECODE_FEATURE(
+ RISCV::FeatureVendorXqcicm, DecoderTableXqcicm16,
+ "Qualcomm uC Conditional Move custom 16bit opcode table");
TRY_TO_DECODE_AND_ADD_SP(STI.hasFeature(RISCV::FeatureVendorXwchc),
DecoderTableXwchc16,
"WCH QingKe XW custom opcode table");
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
index ef85057..3f1539d 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
@@ -80,7 +80,6 @@ private:
bool selectFPCompare(MachineInstr &MI, MachineIRBuilder &MIB) const;
void emitFence(AtomicOrdering FenceOrdering, SyncScope::ID FenceSSID,
MachineIRBuilder &MIB) const;
- bool selectMergeValues(MachineInstr &MI, MachineIRBuilder &MIB) const;
bool selectUnmergeValues(MachineInstr &MI, MachineIRBuilder &MIB) const;
ComplexRendererFns selectShiftMask(MachineOperand &Root,
@@ -732,8 +731,6 @@ bool RISCVInstructionSelector::select(MachineInstr &MI) {
}
case TargetOpcode::G_IMPLICIT_DEF:
return selectImplicitDef(MI, MIB);
- case TargetOpcode::G_MERGE_VALUES:
- return selectMergeValues(MI, MIB);
case TargetOpcode::G_UNMERGE_VALUES:
return selectUnmergeValues(MI, MIB);
default:
@@ -741,26 +738,13 @@ bool RISCVInstructionSelector::select(MachineInstr &MI) {
}
}
-bool RISCVInstructionSelector::selectMergeValues(MachineInstr &MI,
- MachineIRBuilder &MIB) const {
- assert(MI.getOpcode() == TargetOpcode::G_MERGE_VALUES);
-
- // Build a F64 Pair from operands
- if (MI.getNumOperands() != 3)
- return false;
- Register Dst = MI.getOperand(0).getReg();
- Register Lo = MI.getOperand(1).getReg();
- Register Hi = MI.getOperand(2).getReg();
- if (!isRegInFprb(Dst) || !isRegInGprb(Lo) || !isRegInGprb(Hi))
- return false;
- MI.setDesc(TII.get(RISCV::BuildPairF64Pseudo));
- return constrainSelectedInstRegOperands(MI, TII, TRI, RBI);
-}
-
bool RISCVInstructionSelector::selectUnmergeValues(
MachineInstr &MI, MachineIRBuilder &MIB) const {
assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
+ if (!Subtarget->hasStdExtZfa())
+ return false;
+
// Split F64 Src into two s32 parts
if (MI.getNumOperands() != 3)
return false;
@@ -769,8 +753,17 @@ bool RISCVInstructionSelector::selectUnmergeValues(
Register Hi = MI.getOperand(1).getReg();
if (!isRegInFprb(Src) || !isRegInGprb(Lo) || !isRegInGprb(Hi))
return false;
- MI.setDesc(TII.get(RISCV::SplitF64Pseudo));
- return constrainSelectedInstRegOperands(MI, TII, TRI, RBI);
+
+ MachineInstr *ExtractLo = MIB.buildInstr(RISCV::FMV_X_W_FPR64, {Lo}, {Src});
+ if (!constrainSelectedInstRegOperands(*ExtractLo, TII, TRI, RBI))
+ return false;
+
+ MachineInstr *ExtractHi = MIB.buildInstr(RISCV::FMVH_X_D, {Hi}, {Src});
+ if (!constrainSelectedInstRegOperands(*ExtractHi, TII, TRI, RBI))
+ return false;
+
+ MI.eraseFromParent();
+ return true;
}
bool RISCVInstructionSelector::replacePtrWithInt(MachineOperand &Op,
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
index 8284737..6f06459 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
@@ -21,6 +21,7 @@
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/ValueTypes.h"
@@ -132,7 +133,14 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST)
auto PtrVecTys = {nxv1p0, nxv2p0, nxv4p0, nxv8p0, nxv16p0};
- getActionDefinitionsBuilder({G_ADD, G_SUB, G_AND, G_OR, G_XOR})
+ getActionDefinitionsBuilder({G_ADD, G_SUB})
+ .legalFor({sXLen})
+ .legalIf(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST))
+ .customFor(ST.is64Bit(), {s32})
+ .widenScalarToNextPow2(0)
+ .clampScalar(0, sXLen, sXLen);
+
+ getActionDefinitionsBuilder({G_AND, G_OR, G_XOR})
.legalFor({sXLen})
.legalIf(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST))
.widenScalarToNextPow2(0)
@@ -1330,6 +1338,24 @@ bool RISCVLegalizerInfo::legalizeCustom(
return true;
return Helper.lowerConstant(MI);
}
+ case TargetOpcode::G_SUB:
+ case TargetOpcode::G_ADD: {
+ Helper.Observer.changingInstr(MI);
+ Helper.widenScalarSrc(MI, sXLen, 1, TargetOpcode::G_ANYEXT);
+ Helper.widenScalarSrc(MI, sXLen, 2, TargetOpcode::G_ANYEXT);
+
+ Register DstALU = MRI.createGenericVirtualRegister(sXLen);
+
+ MachineOperand &MO = MI.getOperand(0);
+ MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
+ auto DstSext = MIRBuilder.buildSExtInReg(sXLen, DstALU, 32);
+
+ MIRBuilder.buildInstr(TargetOpcode::G_TRUNC, {MO}, {DstSext});
+ MO.setReg(DstALU);
+
+ Helper.Observer.changedInstr(MI);
+ return true;
+ }
case TargetOpcode::G_SEXT_INREG: {
LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
int64_t SizeInBits = MI.getOperand(2).getImm();
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
index eab4a5e..0cb1ef0 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
@@ -38,9 +38,12 @@ std::optional<MCFixupKind> RISCVAsmBackend::getFixupKind(StringRef Name) const {
if (STI.getTargetTriple().isOSBinFormatELF()) {
unsigned Type;
Type = llvm::StringSwitch<unsigned>(Name)
-#define ELF_RELOC(X, Y) .Case(#X, Y)
+#define ELF_RELOC(NAME, ID) .Case(#NAME, ID)
#include "llvm/BinaryFormat/ELFRelocs/RISCV.def"
#undef ELF_RELOC
+#define ELF_RISCV_NONSTANDARD_RELOC(_VENDOR, NAME, ID) .Case(#NAME, ID)
+#include "llvm/BinaryFormat/ELFRelocs/RISCV_nonstandard.def"
+#undef ELF_RISCV_NONSTANDARD_RELOC
.Case("BFD_RELOC_NONE", ELF::R_RISCV_NONE)
.Case("BFD_RELOC_32", ELF::R_RISCV_32)
.Case("BFD_RELOC_64", ELF::R_RISCV_64)
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
index b9f4db0..7048e40 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
@@ -302,6 +302,7 @@ enum OperandType : unsigned {
OPERAND_UIMM4,
OPERAND_UIMM5,
OPERAND_UIMM5_NONZERO,
+ OPERAND_UIMM5_GT3,
OPERAND_UIMM5_LSB0,
OPERAND_UIMM6,
OPERAND_UIMM6_LSB0,
@@ -453,8 +454,6 @@ int getLoadFPImm(APFloat FPImm);
namespace RISCVSysReg {
struct SysReg {
const char Name[32];
- const char AltName[32];
- const char DeprecatedName[32];
unsigned Encoding;
// FIXME: add these additional fields when needed.
// Privilege Access: Read, Write, Read-Only.
@@ -466,11 +465,13 @@ struct SysReg {
// Register number without the privilege bits.
// unsigned Number;
FeatureBitset FeaturesRequired;
- bool isRV32Only;
+ bool IsRV32Only;
+ bool IsAltName;
+ bool IsDeprecatedName;
bool haveRequiredFeatures(const FeatureBitset &ActiveFeatures) const {
// Not in 32-bit mode.
- if (isRV32Only && ActiveFeatures[RISCV::Feature64Bit])
+ if (IsRV32Only && ActiveFeatures[RISCV::Feature64Bit])
return false;
// No required feature associated with the system register.
if (FeaturesRequired.none())
@@ -479,6 +480,7 @@ struct SysReg {
}
};
+#define GET_SysRegEncodings_DECL
#define GET_SysRegsList_DECL
#include "RISCVGenSearchableTables.inc"
} // end namespace RISCVSysReg
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
index d36c0d7..d525471 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
@@ -121,6 +121,8 @@ void RISCVInstPrinter::printCSRSystemRegister(const MCInst *MI, unsigned OpNo,
unsigned Imm = MI->getOperand(OpNo).getImm();
auto Range = RISCVSysReg::lookupSysRegByEncoding(Imm);
for (auto &Reg : Range) {
+ if (Reg.IsAltName || Reg.IsDeprecatedName)
+ continue;
if (Reg.haveRequiredFeatures(STI.getFeatureBits())) {
markup(O, Markup::Register) << Reg.Name;
return;
diff --git a/llvm/lib/Target/RISCV/RISCV.td b/llvm/lib/Target/RISCV/RISCV.td
index 9631241..4e0c64a 100644
--- a/llvm/lib/Target/RISCV/RISCV.td
+++ b/llvm/lib/Target/RISCV/RISCV.td
@@ -64,6 +64,12 @@ include "RISCVSchedXiangShanNanHu.td"
include "RISCVProcessors.td"
//===----------------------------------------------------------------------===//
+// Pfm Counters
+//===----------------------------------------------------------------------===//
+
+include "RISCVPfmCounters.td"
+
+//===----------------------------------------------------------------------===//
// Define the RISC-V target.
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/RISCV/RISCVCombine.td b/llvm/lib/Target/RISCV/RISCVCombine.td
index 030613a..995dd0c 100644
--- a/llvm/lib/Target/RISCV/RISCVCombine.td
+++ b/llvm/lib/Target/RISCV/RISCVCombine.td
@@ -25,5 +25,5 @@ def RISCVPostLegalizerCombiner
: GICombiner<"RISCVPostLegalizerCombinerImpl",
[sub_to_add, combines_for_extload, redundant_and,
identity_combines, shift_immed_chain,
- commute_constant_to_rhs]> {
+ commute_constant_to_rhs, simplify_neg_minmax]> {
}
diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td
index dfc56588..01bc5387 100644
--- a/llvm/lib/Target/RISCV/RISCVFeatures.td
+++ b/llvm/lib/Target/RISCV/RISCVFeatures.td
@@ -844,6 +844,10 @@ def HasStdExtH : Predicate<"Subtarget->hasStdExtH()">,
// Supervisor extensions
+def FeatureStdExtSdext : RISCVExperimentalExtension<1, 0, "External debugger">;
+
+def FeatureStdExtSdtrig : RISCVExperimentalExtension<1, 0, "Debugger triggers">;
+
def FeatureStdExtShgatpa
: RISCVExtension<1, 0,
"SvNNx4 mode supported for all modes supported by satp, as well as Bare">;
@@ -1274,6 +1278,30 @@ def HasVendorXqcilsm
AssemblerPredicate<(all_of FeatureVendorXqcilsm),
"'Xqcilsm' (Qualcomm uC Load Store Multiple Extension)">;
+def FeatureVendorXqciac
+ : RISCVExperimentalExtension<0, 2, "Qualcomm uC Load-Store Address Calculation Extension",
+ [FeatureStdExtZca]>;
+def HasVendorXqciac
+ : Predicate<"Subtarget->hasVendorXqciac()">,
+ AssemblerPredicate<(all_of FeatureVendorXqciac),
+ "'Xqciac' (Qualcomm uC Load-Store Address Calculation Extension)">;
+
+def FeatureVendorXqcicli
+ : RISCVExperimentalExtension<0, 2,
+ "Qualcomm uC Conditional Load Immediate Extension">;
+def HasVendorXqcicli
+ : Predicate<"Subtarget->hasVendorXqcicli()">,
+ AssemblerPredicate<(all_of FeatureVendorXqcicli),
+ "'Xqcicli' (Qualcomm uC Conditional Load Immediate Extension)">;
+
+def FeatureVendorXqcicm
+ : RISCVExperimentalExtension<0, 2, "Qualcomm uC Conditional Move Extension",
+ [FeatureStdExtZca]>;
+def HasVendorXqcicm
+ : Predicate<"Subtarget->hasVendorXqcicm()">,
+ AssemblerPredicate<(all_of FeatureVendorXqcicm),
+ "'Xqcicm' (Qualcomm uC Conditional Move Extension)">;
+
//===----------------------------------------------------------------------===//
// LLVM specific features and extensions
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index cda64ae..6c58989 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -5104,6 +5104,7 @@ static SDValue lowerShuffleViaVRegSplitting(ShuffleVectorSDNode *SVN,
SDValue V1 = SVN->getOperand(0);
SDValue V2 = SVN->getOperand(1);
ArrayRef<int> Mask = SVN->getMask();
+ unsigned NumElts = VT.getVectorNumElements();
// If we don't know exact data layout, not much we can do. If this
// is already m1 or smaller, no point in splitting further.
@@ -5120,70 +5121,58 @@ static SDValue lowerShuffleViaVRegSplitting(ShuffleVectorSDNode *SVN,
MVT ElemVT = VT.getVectorElementType();
unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
+ unsigned VRegsPerSrc = NumElts / ElemsPerVReg;
+
+ SmallVector<std::pair<int, SmallVector<int>>>
+ OutMasks(VRegsPerSrc, {-1, {}});
+
+ // Check if our mask can be done as a 1-to-1 mapping from source
+ // to destination registers in the group without needing to
+ // write each destination more than once.
+ for (unsigned DstIdx = 0; DstIdx < Mask.size(); DstIdx++) {
+ int DstVecIdx = DstIdx / ElemsPerVReg;
+ int DstSubIdx = DstIdx % ElemsPerVReg;
+ int SrcIdx = Mask[DstIdx];
+ if (SrcIdx < 0 || (unsigned)SrcIdx >= 2 * NumElts)
+ continue;
+ int SrcVecIdx = SrcIdx / ElemsPerVReg;
+ int SrcSubIdx = SrcIdx % ElemsPerVReg;
+ if (OutMasks[DstVecIdx].first == -1)
+ OutMasks[DstVecIdx].first = SrcVecIdx;
+ if (OutMasks[DstVecIdx].first != SrcVecIdx)
+ // Note: This case could easily be handled by keeping track of a chain
+ // of source values and generating two element shuffles below. This is
+ // less an implementation question, and more a profitability one.
+ return SDValue();
+
+ OutMasks[DstVecIdx].second.resize(ElemsPerVReg, -1);
+ OutMasks[DstVecIdx].second[DstSubIdx] = SrcSubIdx;
+ }
EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
assert(M1VT == getLMUL1VT(M1VT));
unsigned NumOpElts = M1VT.getVectorMinNumElements();
- unsigned NormalizedVF = ContainerVT.getVectorMinNumElements();
- unsigned NumOfSrcRegs = NormalizedVF / NumOpElts;
- unsigned NumOfDestRegs = NormalizedVF / NumOpElts;
+ SDValue Vec = DAG.getUNDEF(ContainerVT);
// The following semantically builds up a fixed length concat_vector
// of the component shuffle_vectors. We eagerly lower to scalable here
// to avoid DAG combining it back to a large shuffle_vector again.
V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
- SmallVector<SDValue> SubRegs(NumOfDestRegs);
- unsigned RegCnt = 0;
- unsigned PrevCnt = 0;
- processShuffleMasks(
- Mask, NumOfSrcRegs, NumOfDestRegs, NumOfDestRegs,
- [&]() {
- PrevCnt = RegCnt;
- ++RegCnt;
- },
- [&, &DAG = DAG](ArrayRef<int> SrcSubMask, unsigned SrcVecIdx,
- unsigned DstVecIdx) {
- SDValue SrcVec = SrcVecIdx >= NumOfSrcRegs ? V2 : V1;
- unsigned ExtractIdx = (SrcVecIdx % NumOfSrcRegs) * NumOpElts;
- SDValue SubVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, SrcVec,
- DAG.getVectorIdxConstant(ExtractIdx, DL));
- SubVec = convertFromScalableVector(OneRegVT, SubVec, DAG, Subtarget);
- SubVec = DAG.getVectorShuffle(OneRegVT, DL, SubVec, SubVec, SrcSubMask);
- SubRegs[RegCnt] = convertToScalableVector(M1VT, SubVec, DAG, Subtarget);
- PrevCnt = RegCnt;
- ++RegCnt;
- },
- [&, &DAG = DAG](ArrayRef<int> SrcSubMask, unsigned Idx1, unsigned Idx2) {
- if (PrevCnt + 1 == RegCnt)
- ++RegCnt;
- SDValue SubVec1 = SubRegs[PrevCnt + 1];
- if (!SubVec1) {
- SDValue SrcVec = Idx1 >= NumOfSrcRegs ? V2 : V1;
- unsigned ExtractIdx = (Idx1 % NumOfSrcRegs) * NumOpElts;
- SubVec1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, SrcVec,
- DAG.getVectorIdxConstant(ExtractIdx, DL));
- }
- SubVec1 = convertFromScalableVector(OneRegVT, SubVec1, DAG, Subtarget);
- SDValue SrcVec = Idx2 >= NumOfSrcRegs ? V2 : V1;
- unsigned ExtractIdx = (Idx2 % NumOfSrcRegs) * NumOpElts;
- SDValue SubVec2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, SrcVec,
- DAG.getVectorIdxConstant(ExtractIdx, DL));
- SubVec2 = convertFromScalableVector(OneRegVT, SubVec2, DAG, Subtarget);
- SubVec1 =
- DAG.getVectorShuffle(OneRegVT, DL, SubVec1, SubVec2, SrcSubMask);
- SubVec1 = convertToScalableVector(M1VT, SubVec1, DAG, Subtarget);
- SubRegs[PrevCnt + 1] = SubVec1;
- });
- assert(RegCnt == NumOfDestRegs && "Whole vector must be processed");
- SDValue Vec = DAG.getUNDEF(ContainerVT);
- for (auto [I, V] : enumerate(SubRegs)) {
- if (!V)
+ for (unsigned DstVecIdx = 0 ; DstVecIdx < OutMasks.size(); DstVecIdx++) {
+ auto &[SrcVecIdx, SrcSubMask] = OutMasks[DstVecIdx];
+ if (SrcVecIdx == -1)
continue;
- unsigned InsertIdx = I * NumOpElts;
-
- Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Vec, V,
+ unsigned ExtractIdx = (SrcVecIdx % VRegsPerSrc) * NumOpElts;
+ SDValue SrcVec = (unsigned)SrcVecIdx >= VRegsPerSrc ? V2 : V1;
+ SDValue SubVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, SrcVec,
+ DAG.getVectorIdxConstant(ExtractIdx, DL));
+ SubVec = convertFromScalableVector(OneRegVT, SubVec, DAG, Subtarget);
+ SubVec = DAG.getVectorShuffle(OneRegVT, DL, SubVec, SubVec, SrcSubMask);
+ SubVec = convertToScalableVector(M1VT, SubVec, DAG, Subtarget);
+ unsigned InsertIdx = DstVecIdx * NumOpElts;
+ Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Vec, SubVec,
DAG.getVectorIdxConstant(InsertIdx, DL));
}
return convertFromScalableVector(VT, Vec, DAG, Subtarget);
@@ -10165,7 +10154,10 @@ SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op,
case ISD::VP_REDUCE_AND: {
// vcpop ~x == 0
SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
- Vec = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Vec, TrueMask, VL);
+ if (IsVP || VecVT.isFixedLengthVector())
+ Vec = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Vec, TrueMask, VL);
+ else
+ Vec = DAG.getNode(ISD::XOR, DL, ContainerVT, Vec, TrueMask);
Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
CC = ISD::SETEQ;
break;
@@ -12674,8 +12666,7 @@ SDValue RISCVTargetLowering::lowerGET_ROUNDING(SDValue Op,
const MVT XLenVT = Subtarget.getXLenVT();
SDLoc DL(Op);
SDValue Chain = Op->getOperand(0);
- SDValue SysRegNo = DAG.getTargetConstant(
- RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT);
+ SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::frm, DL, XLenVT);
SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
SDValue RM = DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
@@ -12706,8 +12697,7 @@ SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op,
SDLoc DL(Op);
SDValue Chain = Op->getOperand(0);
SDValue RMValue = Op->getOperand(1);
- SDValue SysRegNo = DAG.getTargetConstant(
- RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT);
+ SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::frm, DL, XLenVT);
// Encoding used for rounding mode in RISC-V differs from that used in
// FLT_ROUNDS. To convert it the C rounding mode is used as an index in
@@ -12910,15 +12900,11 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
SDValue LoCounter, HiCounter;
MVT XLenVT = Subtarget.getXLenVT();
if (N->getOpcode() == ISD::READCYCLECOUNTER) {
- LoCounter = DAG.getTargetConstant(
- RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding, DL, XLenVT);
- HiCounter = DAG.getTargetConstant(
- RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding, DL, XLenVT);
+ LoCounter = DAG.getTargetConstant(RISCVSysReg::cycle, DL, XLenVT);
+ HiCounter = DAG.getTargetConstant(RISCVSysReg::cycleh, DL, XLenVT);
} else {
- LoCounter = DAG.getTargetConstant(
- RISCVSysReg::lookupSysRegByName("TIME")->Encoding, DL, XLenVT);
- HiCounter = DAG.getTargetConstant(
- RISCVSysReg::lookupSysRegByName("TIMEH")->Encoding, DL, XLenVT);
+ LoCounter = DAG.getTargetConstant(RISCVSysReg::time, DL, XLenVT);
+ HiCounter = DAG.getTargetConstant(RISCVSysReg::timeh, DL, XLenVT);
}
SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
SDValue RCW = DAG.getNode(RISCVISD::READ_COUNTER_WIDE, DL, VTs,
@@ -18397,6 +18383,15 @@ bool RISCVTargetLowering::isDesirableToCommuteWithShift(
auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));
auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
+
+ // Bail if we might break a sh{1,2,3}add pattern.
+ if (Subtarget.hasStdExtZba() && C2 && C2->getZExtValue() >= 1 &&
+ C2->getZExtValue() <= 3 && N->hasOneUse() &&
+ N->user_begin()->getOpcode() == ISD::ADD &&
+ !isUsedByLdSt(*N->user_begin(), nullptr) &&
+ !isa<ConstantSDNode>(N->user_begin()->getOperand(1)))
+ return false;
+
if (C1 && C2) {
const APInt &C1Int = C1->getAPIntValue();
APInt ShiftedC1Int = C1Int << C2->getAPIntValue();
@@ -20278,13 +20273,11 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
for (auto &Reg : RegsToPass)
Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
- if (!IsTailCall) {
- // Add a register mask operand representing the call-preserved registers.
- const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
- const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
- assert(Mask && "Missing call preserved mask for calling convention");
- Ops.push_back(DAG.getRegisterMask(Mask));
- }
+ // Add a register mask operand representing the call-preserved registers.
+ const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
+ const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
+ assert(Mask && "Missing call preserved mask for calling convention");
+ Ops.push_back(DAG.getRegisterMask(Mask));
// Glue the call to the argument copies, if any.
if (Glue.getNode())
diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
index 7598583..1fd130d 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -627,7 +627,7 @@ public:
return MI;
}
- void setAVL(VSETVLIInfo Info) {
+ void setAVL(const VSETVLIInfo &Info) {
assert(Info.isValid());
if (Info.isUnknown())
setUnknown();
@@ -1223,7 +1223,8 @@ bool RISCVInsertVSETVLI::needVSETVLI(const DemandedFields &Used,
// If we don't use LMUL or the SEW/LMUL ratio, then adjust LMUL so that we
// maintain the SEW/LMUL ratio. This allows us to eliminate VL toggles in more
// places.
-static VSETVLIInfo adjustIncoming(VSETVLIInfo PrevInfo, VSETVLIInfo NewInfo,
+static VSETVLIInfo adjustIncoming(const VSETVLIInfo &PrevInfo,
+ const VSETVLIInfo &NewInfo,
DemandedFields &Demanded) {
VSETVLIInfo Info = NewInfo;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
index ae969bff8..349bc36 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
@@ -23,7 +23,9 @@ def SDT_RISCVSplitF64 : SDTypeProfile<2, 1, [SDTCisVT<0, i32>,
SDTCisVT<2, f64>]>;
def RISCVBuildPairF64 : SDNode<"RISCVISD::BuildPairF64", SDT_RISCVBuildPairF64>;
+def : GINodeEquiv<G_MERGE_VALUES, RISCVBuildPairF64>;
def RISCVSplitF64 : SDNode<"RISCVISD::SplitF64", SDT_RISCVSplitF64>;
+def : GINodeEquiv<G_UNMERGE_VALUES, RISCVSplitF64>;
def AddrRegImmINX : ComplexPattern<iPTR, 2, "SelectAddrRegImmRV32Zdinx">;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td
index 99186ec..942ced8 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td
@@ -536,27 +536,23 @@ multiclass VPatTernaryVMAQA_VV_VX<string intrinsic, string instruction,
//===----------------------------------------------------------------------===//
let Predicates = [HasVendorXTHeadBa] in {
-def : Pat<(add (XLenVT GPR:$rs1), (shl GPR:$rs2, uimm2:$uimm2)),
+def : Pat<(add_like_non_imm12 (shl GPR:$rs2, uimm2:$uimm2), (XLenVT GPR:$rs1)),
+ (TH_ADDSL GPR:$rs1, GPR:$rs2, uimm2:$uimm2)>;
+def : Pat<(XLenVT (riscv_shl_add GPR:$rs2, uimm2:$uimm2, GPR:$rs1)),
(TH_ADDSL GPR:$rs1, GPR:$rs2, uimm2:$uimm2)>;
-def : Pat<(XLenVT (riscv_shl_add GPR:$rs1, uimm2:$uimm2, GPR:$rs2)),
- (TH_ADDSL GPR:$rs2, GPR:$rs1, uimm2:$uimm2)>;
// Reuse complex patterns from StdExtZba
-def : Pat<(add_non_imm12 sh1add_op:$rs1, (XLenVT GPR:$rs2)),
- (TH_ADDSL GPR:$rs2, sh1add_op:$rs1, 1)>;
-def : Pat<(add_non_imm12 sh2add_op:$rs1, (XLenVT GPR:$rs2)),
- (TH_ADDSL GPR:$rs2, sh2add_op:$rs1, 2)>;
-def : Pat<(add_non_imm12 sh3add_op:$rs1, (XLenVT GPR:$rs2)),
- (TH_ADDSL GPR:$rs2, sh3add_op:$rs1, 3)>;
-
-def : Pat<(add (XLenVT GPR:$r), CSImm12MulBy4:$i),
- (TH_ADDSL GPR:$r, (XLenVT (ADDI (XLenVT X0), (SimmShiftRightBy2XForm CSImm12MulBy4:$i))), 2)>;
-def : Pat<(add (XLenVT GPR:$r), CSImm12MulBy8:$i),
- (TH_ADDSL GPR:$r, (XLenVT (ADDI (XLenVT X0), (SimmShiftRightBy3XForm CSImm12MulBy8:$i))), 3)>;
-
-def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 200)),
- (SLLI (XLenVT (TH_ADDSL (XLenVT (TH_ADDSL GPR:$r, GPR:$r, 2)),
- (XLenVT (TH_ADDSL GPR:$r, GPR:$r, 2)), 2)), 3)>;
+def : Pat<(add_like_non_imm12 sh1add_op:$rs2, (XLenVT GPR:$rs1)),
+ (TH_ADDSL GPR:$rs1, sh1add_op:$rs2, 1)>;
+def : Pat<(add_like_non_imm12 sh2add_op:$rs2, (XLenVT GPR:$rs1)),
+ (TH_ADDSL GPR:$rs1, sh2add_op:$rs2, 2)>;
+def : Pat<(add_like_non_imm12 sh3add_op:$rs2, (XLenVT GPR:$rs1)),
+ (TH_ADDSL GPR:$rs1, sh3add_op:$rs2, 3)>;
+
+def : Pat<(add_like (XLenVT GPR:$r), CSImm12MulBy4:$i),
+ (TH_ADDSL GPR:$r, (XLenVT (ADDI (XLenVT X0), CSImm12MulBy4:$i)), 2)>;
+def : Pat<(add_like (XLenVT GPR:$r), CSImm12MulBy8:$i),
+ (TH_ADDSL GPR:$r, (XLenVT (ADDI (XLenVT X0), CSImm12MulBy8:$i)), 3)>;
} // Predicates = [HasVendorXTHeadBa]
let Predicates = [HasVendorXTHeadBb] in {
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td
index 05b5591..6f15646 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td
@@ -21,6 +21,13 @@ def uimm5nonzero : RISCVOp<XLenVT>,
let OperandType = "OPERAND_UIMM5_NONZERO";
}
+def uimm5gt3 : RISCVOp<XLenVT>, ImmLeaf<XLenVT,
+ [{return (Imm > 3) && isUInt<5>(Imm);}]> {
+ let ParserMatchClass = UImmAsmOperand<5, "GT3">;
+ let DecoderMethod = "decodeUImmOperand<5>";
+ let OperandType = "OPERAND_UIMM5_GT3";
+}
+
def uimm11 : RISCVUImmLeafOp<11>;
//===----------------------------------------------------------------------===//
@@ -132,6 +139,33 @@ class QCIStoreMultiple<bits<2> funct2, DAGOperand InTyRs2, string opcodestr>
let Inst{31-25} = {funct2, imm{6-2}};
}
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
+class QCILICC<bits<3> funct3, bits<2> funct2, DAGOperand InTyRs2, string opcodestr>
+ : RVInstRBase<funct3, OPC_CUSTOM_2, (outs GPRNoX0:$rd_wb),
+ (ins GPRNoX0:$rd, GPRNoX0:$rs1, InTyRs2:$rs2, simm5:$simm),
+ opcodestr, "$rd, $rs1, $rs2, $simm"> {
+ let Constraints = "$rd = $rd_wb";
+ bits<5> simm;
+
+ let Inst{31-25} = {simm, funct2};
+}
+
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
+class QCIMVCC<bits<3> funct3, string opcodestr>
+ : RVInstR4<0b00, funct3, OPC_CUSTOM_2, (outs GPRNoX0:$rd),
+ (ins GPRNoX0:$rs1, GPRNoX0:$rs2, GPRNoX0:$rs3),
+ opcodestr, "$rd, $rs1, $rs2, $rs3">;
+
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
+class QCIMVCCI<bits<3> funct3, string opcodestr, DAGOperand immType>
+ : RVInstR4<0b10, funct3, OPC_CUSTOM_2, (outs GPRNoX0:$rd),
+ (ins GPRNoX0:$rs1, immType:$imm, GPRNoX0:$rs3),
+ opcodestr, "$rd, $rs1, $imm, $rs3"> {
+ bits<5> imm;
+
+ let rs2 = imm;
+}
+
//===----------------------------------------------------------------------===//
// Instructions
//===----------------------------------------------------------------------===//
@@ -184,6 +218,37 @@ let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
} // hasSideEffects = 0, mayLoad = 0, mayStore = 0
} // Predicates = [HasVendorXqcia, IsRV32], DecoderNamespace = "Xqcia"
+let Predicates = [HasVendorXqciac, IsRV32], DecoderNamespace = "Xqciac" in {
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
+ def QC_C_MULADDI : RVInst16CL<0b001, 0b10, (outs GPRC:$rd_wb),
+ (ins GPRC:$rd, GPRC:$rs1, uimm5:$uimm),
+ "qc.c.muladdi", "$rd, $rs1, $uimm"> {
+ let Constraints = "$rd = $rd_wb";
+ bits<5> uimm;
+
+ let Inst{12-10} = uimm{3-1};
+ let Inst{6} = uimm{0};
+ let Inst{5} = uimm{4};
+ }
+
+ def QC_MULADDI : RVInstI<0b110, OPC_CUSTOM_0, (outs GPRNoX0:$rd_wb),
+ (ins GPRNoX0:$rd, GPRNoX0:$rs1, simm12:$imm12),
+ "qc.muladdi", "$rd, $rs1, $imm12"> {
+ let Constraints = "$rd = $rd_wb";
+ }
+
+ def QC_SHLADD : RVInstRBase<0b011, OPC_CUSTOM_0, (outs GPRNoX0:$rd),
+ (ins GPRNoX0:$rs1, GPRNoX0:$rs2, uimm5gt3:$shamt),
+ "qc.shladd", "$rd, $rs1, $rs2, $shamt"> {
+ bits<5> shamt;
+
+ let Inst{31-30} = 0b01;
+ let Inst{29-25} = shamt;
+ }
+
+} // hasSideEffects = 0, mayLoad = 0, mayStore = 0
+} // Predicates = [HasVendorXqciac, IsRV32], DecoderNamespace = "Xqciac"
+
let Predicates = [HasVendorXqcics, IsRV32], DecoderNamespace = "Xqcics" in {
def QC_SELECTIIEQ : QCISELECTIICC <0b010, "qc.selectiieq">;
def QC_SELECTIINE : QCISELECTIICC <0b011, "qc.selectiine">;
@@ -205,6 +270,48 @@ let Predicates = [HasVendorXqcilsm, IsRV32], DecoderNamespace = "Xqcilsm" in {
def QC_LWMI : QCILoadMultiple<0b01, uimm5nonzero, "qc.lwmi">;
} // Predicates = [HasVendorXqcilsm, IsRV32], DecoderNamespace = "Xqcilsm"
+let Predicates = [HasVendorXqcicli, IsRV32], DecoderNamespace = "Xqcicli" in {
+ def QC_LIEQ : QCILICC<0b000, 0b01, GPRNoX0, "qc.lieq">;
+ def QC_LINE : QCILICC<0b001, 0b01, GPRNoX0, "qc.line">;
+ def QC_LILT : QCILICC<0b100, 0b01, GPRNoX0, "qc.lilt">;
+ def QC_LIGE : QCILICC<0b101, 0b01, GPRNoX0, "qc.lige">;
+ def QC_LILTU : QCILICC<0b110, 0b01, GPRNoX0, "qc.liltu">;
+ def QC_LIGEU : QCILICC<0b111, 0b01, GPRNoX0, "qc.ligeu">;
+
+ def QC_LIEQI : QCILICC<0b000, 0b11, simm5, "qc.lieqi">;
+ def QC_LINEI : QCILICC<0b001, 0b11, simm5, "qc.linei">;
+ def QC_LILTI : QCILICC<0b100, 0b11, simm5, "qc.lilti">;
+ def QC_LIGEI : QCILICC<0b101, 0b11, simm5, "qc.ligei">;
+ def QC_LILTUI : QCILICC<0b110, 0b11, uimm5, "qc.liltui">;
+ def QC_LIGEUI : QCILICC<0b111, 0b11, uimm5, "qc.ligeui">;
+} // Predicates = [HasVendorXqcicli, IsRV32], DecoderNamespace = "Xqcicli"
+
+let Predicates = [HasVendorXqcicm, IsRV32], DecoderNamespace = "Xqcicm" in {
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
+ def QC_C_MVEQZ : RVInst16CL<0b101, 0b10, (outs GPRC:$rd_wb),
+ (ins GPRC:$rd, GPRC:$rs1),
+ "qc.c.mveqz", "$rd, $rs1"> {
+ let Constraints = "$rd = $rd_wb";
+
+ let Inst{12-10} = 0b011;
+ let Inst{6-5} = 0b00;
+ }
+
+ def QC_MVEQ : QCIMVCC<0b000, "qc.mveq">;
+ def QC_MVNE : QCIMVCC<0b001, "qc.mvne">;
+ def QC_MVLT : QCIMVCC<0b100, "qc.mvlt">;
+ def QC_MVGE : QCIMVCC<0b101, "qc.mvge">;
+ def QC_MVLTU : QCIMVCC<0b110, "qc.mvltu">;
+ def QC_MVGEU : QCIMVCC<0b111, "qc.mvgeu">;
+
+ def QC_MVEQI : QCIMVCCI<0b000, "qc.mveqi", simm5>;
+ def QC_MVNEI : QCIMVCCI<0b001, "qc.mvnei", simm5>;
+ def QC_MVLTI : QCIMVCCI<0b100, "qc.mvlti", simm5>;
+ def QC_MVGEI : QCIMVCCI<0b101, "qc.mvgei", simm5>;
+ def QC_MVLTUI : QCIMVCCI<0b110, "qc.mvltui", uimm5>;
+ def QC_MVGEUI : QCIMVCCI<0b111, "qc.mvgeui", uimm5>;
+} // Predicates = [HasVendorXqcicm, IsRV32], DecoderNamespace = "Xqcicm"
+
//===----------------------------------------------------------------------===//
// Aliases
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/RISCV/RISCVPfmCounters.td b/llvm/lib/Target/RISCV/RISCVPfmCounters.td
new file mode 100644
index 0000000..013e789
--- /dev/null
+++ b/llvm/lib/Target/RISCV/RISCVPfmCounters.td
@@ -0,0 +1,18 @@
+//===---- RISCVPfmCounters.td - RISC-V Hardware Counters ---*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This describes the available hardware counters for RISC-V.
+//
+//===----------------------------------------------------------------------===//
+
+def CpuCyclesPfmCounter : PfmCounter<"CYCLES">;
+
+def DefaultPfmCounters : ProcPfmCounters {
+ let CycleCounter = CpuCyclesPfmCounter;
+}
+def : PfmCountersDefaultBinding<DefaultPfmCounters>;
diff --git a/llvm/lib/Target/RISCV/RISCVProcessors.td b/llvm/lib/Target/RISCV/RISCVProcessors.td
index 61c7c21..6dfed7dd 100644
--- a/llvm/lib/Target/RISCV/RISCVProcessors.td
+++ b/llvm/lib/Target/RISCV/RISCVProcessors.td
@@ -321,6 +321,25 @@ def SIFIVE_P470 : RISCVProcessorModel<"sifive-p470", SiFiveP400Model,
[TuneNoSinkSplatOperands,
TuneVXRMPipelineFlush])>;
+defvar SiFiveP500TuneFeatures = [TuneNoDefaultUnroll,
+ TuneConditionalCompressedMoveFusion,
+ TuneLUIADDIFusion,
+ TuneAUIPCADDIFusion,
+ TunePostRAScheduler];
+
+def SIFIVE_P550 : RISCVProcessorModel<"sifive-p550", NoSchedModel,
+ [Feature64Bit,
+ FeatureStdExtI,
+ FeatureStdExtZifencei,
+ FeatureStdExtM,
+ FeatureStdExtA,
+ FeatureStdExtF,
+ FeatureStdExtD,
+ FeatureStdExtC,
+ FeatureStdExtZba,
+ FeatureStdExtZbb],
+ SiFiveP500TuneFeatures>;
+
def SIFIVE_P670 : RISCVProcessorModel<"sifive-p670", SiFiveP600Model,
!listconcat(RVA22U64Features,
[FeatureStdExtV,
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td
index a86c255..396cbe2c 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td
@@ -182,7 +182,7 @@ def P400WriteCMOV : SchedWriteRes<[SiFiveP400Branch, SiFiveP400IEXQ1]> {
}
def : InstRW<[P400WriteCMOV], (instrs PseudoCCMOVGPRNoX0)>;
-let Latency = 3 in {
+let Latency = 2 in {
// Integer multiplication
def : WriteRes<WriteIMul, [SiFiveP400MulDiv]>;
def : WriteRes<WriteIMul32, [SiFiveP400MulDiv]>;
diff --git a/llvm/lib/Target/RISCV/RISCVSchedule.td b/llvm/lib/Target/RISCV/RISCVSchedule.td
index 7946a74..ceaeb85 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedule.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedule.td
@@ -237,6 +237,7 @@ def : ReadAdvance<ReadFCvtF16ToI32, 0>;
def : ReadAdvance<ReadFDiv16, 0>;
def : ReadAdvance<ReadFCmp16, 0>;
def : ReadAdvance<ReadFMA16, 0>;
+def : ReadAdvance<ReadFMA16Addend, 0>;
def : ReadAdvance<ReadFMinMax16, 0>;
def : ReadAdvance<ReadFMul16, 0>;
def : ReadAdvance<ReadFSGNJ16, 0>;
diff --git a/llvm/lib/Target/RISCV/RISCVSystemOperands.td b/llvm/lib/Target/RISCV/RISCVSystemOperands.td
index d85b4a9..4c86103 100644
--- a/llvm/lib/Target/RISCV/RISCVSystemOperands.td
+++ b/llvm/lib/Target/RISCV/RISCVSystemOperands.td
@@ -19,12 +19,6 @@ include "llvm/TableGen/SearchableTable.td"
class SysReg<string name, bits<12> op> {
string Name = name;
- // A maximum of one alias is supported right now.
- string AltName = name;
- // A maximum of one deprecated name is supported right now. Unlike the
- // `AltName` alias, a `DeprecatedName` generates a diagnostic when the name is
- // used to encourage software to migrate away from the name.
- string DeprecatedName = "";
bits<12> Encoding = op;
// FIXME: add these additional fields when needed.
// Privilege Access: Read and Write = 0, 1, 2; Read-Only = 3.
@@ -37,14 +31,16 @@ class SysReg<string name, bits<12> op> {
// bits<6> Number = op{5 - 0};
code FeaturesRequired = [{ {} }];
bit isRV32Only = 0;
+ bit isAltName = 0;
+ bit isDeprecatedName = 0;
}
def SysRegsList : GenericTable {
let FilterClass = "SysReg";
// FIXME: add "ReadWrite", "Mode", "Extra", "Number" fields when needed.
let Fields = [
- "Name", "AltName", "DeprecatedName", "Encoding", "FeaturesRequired",
- "isRV32Only",
+ "Name", "Encoding", "FeaturesRequired",
+ "isRV32Only", "isAltName", "isDeprecatedName"
];
let PrimaryKey = [ "Encoding" ];
@@ -52,19 +48,15 @@ def SysRegsList : GenericTable {
let PrimaryKeyReturnRange = true;
}
-def lookupSysRegByName : SearchIndex {
- let Table = SysRegsList;
- let Key = [ "Name" ];
-}
-
-def lookupSysRegByAltName : SearchIndex {
- let Table = SysRegsList;
- let Key = [ "AltName" ];
+def SysRegEncodings : GenericEnum {
+ let FilterClass = "SysReg";
+ let NameField = "Name";
+ let ValueField = "Encoding";
}
-def lookupSysRegByDeprecatedName : SearchIndex {
+def lookupSysRegByName : SearchIndex {
let Table = SysRegsList;
- let Key = [ "DeprecatedName" ];
+ let Key = [ "Name" ];
}
// The following CSR encodings match those given in Tables 2.2,
@@ -123,15 +115,17 @@ def : SysReg<"senvcfg", 0x10A>;
def : SysReg<"sscratch", 0x140>;
def : SysReg<"sepc", 0x141>;
def : SysReg<"scause", 0x142>;
-let DeprecatedName = "sbadaddr" in
def : SysReg<"stval", 0x143>;
+let isDeprecatedName = 1 in
+def : SysReg<"sbadaddr", 0x143>;
def : SysReg<"sip", 0x144>;
//===----------------------------------------------------------------------===//
// Supervisor Protection and Translation
//===----------------------------------------------------------------------===//
-let DeprecatedName = "sptbr" in
def : SysReg<"satp", 0x180>;
+let isDeprecatedName = 1 in
+def : SysReg<"sptbr", 0x180>;
//===----------------------------------------------------------------------===//
// Quality-of-Service(QoS) Identifiers (Ssqosid)
@@ -245,8 +239,9 @@ def : SysReg<"mstatush", 0x310>;
def : SysReg<"mscratch", 0x340>;
def : SysReg<"mepc", 0x341>;
def : SysReg<"mcause", 0x342>;
-let DeprecatedName = "mbadaddr" in
def : SysReg<"mtval", 0x343>;
+let isDeprecatedName = 1 in
+def : SysReg<"mbadaddr", 0x343>;
def : SysReg<"mip", 0x344>;
def : SysReg<"mtinst", 0x34A>;
def : SysReg<"mtval2", 0x34B>;
@@ -298,8 +293,9 @@ foreach i = 3...31 in
//===----------------------------------------------------------------------===//
// Machine Counter Setup
//===----------------------------------------------------------------------===//
-let AltName = "mucounteren" in // Privileged spec v1.9.1 Name
def : SysReg<"mcountinhibit", 0x320>;
+let isAltName = 1 in
+def : SysReg<"mucounteren", 0x320>;
// mhpmevent3-mhpmevent31 at 0x323-0x33F.
foreach i = 3...31 in
@@ -323,7 +319,10 @@ def : SysReg<"tselect", 0x7A0>;
def : SysReg<"tdata1", 0x7A1>;
def : SysReg<"tdata2", 0x7A2>;
def : SysReg<"tdata3", 0x7A3>;
+def : SysReg<"tinfo", 0x7A4>;
+def : SysReg<"tcontrol", 0x7A5>;
def : SysReg<"mcontext", 0x7A8>;
+def : SysReg<"mscontext", 0x7AA>;
//===----------------------------------------------------------------------===//
// Debug Mode Registers
@@ -333,8 +332,9 @@ def : SysReg<"dpc", 0x7B1>;
// "dscratch" is an alternative name for "dscratch0" which appeared in earlier
// drafts of the RISC-V debug spec
-let AltName = "dscratch" in
def : SysReg<"dscratch0", 0x7B2>;
+let isAltName = 1 in
+def : SysReg<"dscratch", 0x7B2>;
def : SysReg<"dscratch1", 0x7B3>;
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 49192bd..850d624 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -1663,7 +1663,7 @@ InstructionCost RISCVTTIImpl::getStoreImmCost(Type *Ty,
return 0;
if (OpInfo.isUniform())
- // vmv.x.i, vmv.v.x, or vfmv.v.f
+ // vmv.v.i, vmv.v.x, or vfmv.v.f
// We ignore the cost of the scalar constant materialization to be consistent
// with how we treat scalar constants themselves just above.
return 1;
@@ -2329,6 +2329,15 @@ unsigned RISCVTTIImpl::getMaximumVF(unsigned ElemWidth, unsigned Opcode) const {
return std::max<unsigned>(1U, RegWidth.getFixedValue() / ElemWidth);
}
+TTI::AddressingModeKind
+RISCVTTIImpl::getPreferredAddressingMode(const Loop *L,
+ ScalarEvolution *SE) const {
+ if (ST->hasVendorXCVmem() && !ST->is64Bit())
+ return TTI::AMK_PostIndexed;
+
+ return BasicTTIImplBase::getPreferredAddressingMode(L, SE);
+}
+
bool RISCVTTIImpl::isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
const TargetTransformInfo::LSRCost &C2) {
// RISC-V specific here are "instruction number 1st priority".
@@ -2549,16 +2558,21 @@ RISCVTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
TTI::MemCmpExpansionOptions Options;
// TODO: Enable expansion when unaligned access is not supported after we fix
// issues in ExpandMemcmp.
- if (!(ST->enableUnalignedScalarMem() &&
- (ST->hasStdExtZbb() || ST->hasStdExtZbkb() || IsZeroCmp)))
+ if (!ST->enableUnalignedScalarMem())
+ return Options;
+
+ if (!ST->hasStdExtZbb() && !ST->hasStdExtZbkb() && !IsZeroCmp)
return Options;
Options.AllowOverlappingLoads = true;
Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
Options.NumLoadsPerBlock = Options.MaxNumLoads;
- if (ST->is64Bit())
+ if (ST->is64Bit()) {
Options.LoadSizes = {8, 4, 2, 1};
- else
+ Options.AllowedTailExpansions = {3, 5, 6};
+ } else {
Options.LoadSizes = {4, 2, 1};
+ Options.AllowedTailExpansions = {3};
+ }
return Options;
}
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index bd90bfe..9b36439 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -388,6 +388,9 @@ public:
llvm_unreachable("unknown register class");
}
+ TTI::AddressingModeKind getPreferredAddressingMode(const Loop *L,
+ ScalarEvolution *SE) const;
+
unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const {
if (Vector)
return RISCVRegisterClass::VRRC;
diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index 4e3212c..ad61a77 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -50,7 +50,10 @@ public:
StringRef getPassName() const override { return PASS_NAME; }
private:
- bool checkUsers(const MachineOperand *&CommonVL, MachineInstr &MI);
+ std::optional<MachineOperand> getMinimumVLForUser(MachineOperand &UserOp);
+ /// Returns the largest common VL MachineOperand that may be used to optimize
+ /// MI. Returns std::nullopt if it failed to find a suitable VL.
+ std::optional<MachineOperand> checkUsers(MachineInstr &MI);
bool tryReduceVL(MachineInstr &MI);
bool isCandidate(const MachineInstr &MI) const;
};
@@ -76,11 +79,6 @@ static bool isVectorRegClass(Register R, const MachineRegisterInfo *MRI) {
/// Represents the EMUL and EEW of a MachineOperand.
struct OperandInfo {
- enum class State {
- Unknown,
- Known,
- } S;
-
// Represent as 1,2,4,8, ... and fractional indicator. This is because
// EMUL can take on values that don't map to RISCVII::VLMUL values exactly.
// For example, a mask operand can have an EMUL less than MF8.
@@ -89,34 +87,32 @@ struct OperandInfo {
unsigned Log2EEW;
OperandInfo(RISCVII::VLMUL EMUL, unsigned Log2EEW)
- : S(State::Known), EMUL(RISCVVType::decodeVLMUL(EMUL)), Log2EEW(Log2EEW) {
- }
+ : EMUL(RISCVVType::decodeVLMUL(EMUL)), Log2EEW(Log2EEW) {}
OperandInfo(std::pair<unsigned, bool> EMUL, unsigned Log2EEW)
- : S(State::Known), EMUL(EMUL), Log2EEW(Log2EEW) {}
+ : EMUL(EMUL), Log2EEW(Log2EEW) {}
- OperandInfo() : S(State::Unknown) {}
+ OperandInfo(unsigned Log2EEW) : Log2EEW(Log2EEW) {}
- bool isUnknown() const { return S == State::Unknown; }
- bool isKnown() const { return S == State::Known; }
+ OperandInfo() = delete;
static bool EMULAndEEWAreEqual(const OperandInfo &A, const OperandInfo &B) {
- assert(A.isKnown() && B.isKnown() && "Both operands must be known");
-
return A.Log2EEW == B.Log2EEW && A.EMUL->first == B.EMUL->first &&
A.EMUL->second == B.EMUL->second;
}
+ static bool EEWAreEqual(const OperandInfo &A, const OperandInfo &B) {
+ return A.Log2EEW == B.Log2EEW;
+ }
+
void print(raw_ostream &OS) const {
- if (isUnknown()) {
- OS << "Unknown";
- return;
- }
- assert(EMUL && "Expected EMUL to have value");
- OS << "EMUL: m";
- if (EMUL->second)
- OS << "f";
- OS << EMUL->first;
+ if (EMUL) {
+ OS << "EMUL: m";
+ if (EMUL->second)
+ OS << "f";
+ OS << EMUL->first;
+ } else
+ OS << "EMUL: unknown\n";
OS << ", EEW: " << (1 << Log2EEW);
}
};
@@ -127,30 +123,18 @@ static raw_ostream &operator<<(raw_ostream &OS, const OperandInfo &OI) {
return OS;
}
-namespace llvm {
-namespace RISCVVType {
-/// Return the RISCVII::VLMUL that is two times VLMul.
-/// Precondition: VLMul is not LMUL_RESERVED or LMUL_8.
-static RISCVII::VLMUL twoTimesVLMUL(RISCVII::VLMUL VLMul) {
- switch (VLMul) {
- case RISCVII::VLMUL::LMUL_F8:
- return RISCVII::VLMUL::LMUL_F4;
- case RISCVII::VLMUL::LMUL_F4:
- return RISCVII::VLMUL::LMUL_F2;
- case RISCVII::VLMUL::LMUL_F2:
- return RISCVII::VLMUL::LMUL_1;
- case RISCVII::VLMUL::LMUL_1:
- return RISCVII::VLMUL::LMUL_2;
- case RISCVII::VLMUL::LMUL_2:
- return RISCVII::VLMUL::LMUL_4;
- case RISCVII::VLMUL::LMUL_4:
- return RISCVII::VLMUL::LMUL_8;
- case RISCVII::VLMUL::LMUL_8:
- default:
- llvm_unreachable("Could not multiply VLMul by 2");
- }
+LLVM_ATTRIBUTE_UNUSED
+static raw_ostream &operator<<(raw_ostream &OS,
+ const std::optional<OperandInfo> &OI) {
+ if (OI)
+ OI->print(OS);
+ else
+ OS << "nullopt";
+ return OS;
}
+namespace llvm {
+namespace RISCVVType {
/// Return EMUL = (EEW / SEW) * LMUL where EEW comes from Log2EEW and LMUL and
/// SEW are from the TSFlags of MI.
static std::pair<unsigned, bool>
@@ -180,24 +164,22 @@ getEMULEqualsEEWDivSEWTimesLMUL(unsigned Log2EEW, const MachineInstr &MI) {
} // end namespace RISCVVType
} // end namespace llvm
-/// Dest has EEW=SEW and EMUL=LMUL. Source EEW=SEW/Factor (i.e. F2 => EEW/2).
-/// Source has EMUL=(EEW/SEW)*LMUL. LMUL and SEW comes from TSFlags of MI.
-static OperandInfo getIntegerExtensionOperandInfo(unsigned Factor,
- const MachineInstr &MI,
- const MachineOperand &MO) {
- RISCVII::VLMUL MIVLMul = RISCVII::getLMul(MI.getDesc().TSFlags);
+/// Dest has EEW=SEW. Source EEW=SEW/Factor (i.e. F2 => EEW/2).
+/// SEW comes from TSFlags of MI.
+static unsigned getIntegerExtensionOperandEEW(unsigned Factor,
+ const MachineInstr &MI,
+ const MachineOperand &MO) {
unsigned MILog2SEW =
MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();
if (MO.getOperandNo() == 0)
- return OperandInfo(MIVLMul, MILog2SEW);
+ return MILog2SEW;
unsigned MISEW = 1 << MILog2SEW;
unsigned EEW = MISEW / Factor;
unsigned Log2EEW = Log2_32(EEW);
- return OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(Log2EEW, MI),
- Log2EEW);
+ return Log2EEW;
}
/// Check whether MO is a mask operand of MI.
@@ -211,18 +193,15 @@ static bool isMaskOperand(const MachineInstr &MI, const MachineOperand &MO,
return Desc.operands()[MO.getOperandNo()].RegClass == RISCV::VMV0RegClassID;
}
-/// Return the OperandInfo for MO.
-static OperandInfo getOperandInfo(const MachineOperand &MO,
- const MachineRegisterInfo *MRI) {
+static std::optional<unsigned>
+getOperandLog2EEW(const MachineOperand &MO, const MachineRegisterInfo *MRI) {
const MachineInstr &MI = *MO.getParent();
const RISCVVPseudosTable::PseudoInfo *RVV =
RISCVVPseudosTable::getPseudoInfo(MI.getOpcode());
assert(RVV && "Could not find MI in PseudoTable");
- // MI has a VLMUL and SEW associated with it. The RVV specification defines
- // the LMUL and SEW of each operand and definition in relation to MI.VLMUL and
- // MI.SEW.
- RISCVII::VLMUL MIVLMul = RISCVII::getLMul(MI.getDesc().TSFlags);
+ // MI has a SEW associated with it. The RVV specification defines
+ // the EEW of each operand and definition in relation to MI.SEW.
unsigned MILog2SEW =
MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();
@@ -233,13 +212,13 @@ static OperandInfo getOperandInfo(const MachineOperand &MO,
// since they must preserve the entire register content.
if (HasPassthru && MO.getOperandNo() == MI.getNumExplicitDefs() &&
(MO.getReg() != RISCV::NoRegister))
- return {};
+ return std::nullopt;
bool IsMODef = MO.getOperandNo() == 0;
- // All mask operands have EEW=1, EMUL=(EEW/SEW)*LMUL
+ // All mask operands have EEW=1
if (isMaskOperand(MI, MO, MRI))
- return OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(0, MI), 0);
+ return 0;
// switch against BaseInstr to reduce number of cases that need to be
// considered.
@@ -256,55 +235,65 @@ static OperandInfo getOperandInfo(const MachineOperand &MO,
// Vector Loads and Stores
// Vector Unit-Stride Instructions
// Vector Strided Instructions
- /// Dest EEW encoded in the instruction and EMUL=(EEW/SEW)*LMUL
+ /// Dest EEW encoded in the instruction
+ case RISCV::VLM_V:
+ case RISCV::VSM_V:
+ return 0;
+ case RISCV::VLE8_V:
case RISCV::VSE8_V:
+ case RISCV::VLSE8_V:
case RISCV::VSSE8_V:
- return OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(3, MI), 3);
+ return 3;
+ case RISCV::VLE16_V:
case RISCV::VSE16_V:
+ case RISCV::VLSE16_V:
case RISCV::VSSE16_V:
- return OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(4, MI), 4);
+ return 4;
+ case RISCV::VLE32_V:
case RISCV::VSE32_V:
+ case RISCV::VLSE32_V:
case RISCV::VSSE32_V:
- return OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(5, MI), 5);
+ return 5;
+ case RISCV::VLE64_V:
case RISCV::VSE64_V:
+ case RISCV::VLSE64_V:
case RISCV::VSSE64_V:
- return OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(6, MI), 6);
+ return 6;
// Vector Indexed Instructions
// vs(o|u)xei<eew>.v
- // Dest/Data (operand 0) EEW=SEW, EMUL=LMUL. Source EEW=<eew> and
- // EMUL=(EEW/SEW)*LMUL.
+ // Dest/Data (operand 0) EEW=SEW. Source EEW=<eew>.
case RISCV::VLUXEI8_V:
case RISCV::VLOXEI8_V:
case RISCV::VSUXEI8_V:
case RISCV::VSOXEI8_V: {
if (MO.getOperandNo() == 0)
- return OperandInfo(MIVLMul, MILog2SEW);
- return OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(3, MI), 3);
+ return MILog2SEW;
+ return 3;
}
case RISCV::VLUXEI16_V:
case RISCV::VLOXEI16_V:
case RISCV::VSUXEI16_V:
case RISCV::VSOXEI16_V: {
if (MO.getOperandNo() == 0)
- return OperandInfo(MIVLMul, MILog2SEW);
- return OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(4, MI), 4);
+ return MILog2SEW;
+ return 4;
}
case RISCV::VLUXEI32_V:
case RISCV::VLOXEI32_V:
case RISCV::VSUXEI32_V:
case RISCV::VSOXEI32_V: {
if (MO.getOperandNo() == 0)
- return OperandInfo(MIVLMul, MILog2SEW);
- return OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(5, MI), 5);
+ return MILog2SEW;
+ return 5;
}
case RISCV::VLUXEI64_V:
case RISCV::VLOXEI64_V:
case RISCV::VSUXEI64_V:
case RISCV::VSOXEI64_V: {
if (MO.getOperandNo() == 0)
- return OperandInfo(MIVLMul, MILog2SEW);
- return OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(6, MI), 6);
+ return MILog2SEW;
+ return 6;
}
// Vector Integer Arithmetic Instructions
@@ -318,7 +307,7 @@ static OperandInfo getOperandInfo(const MachineOperand &MO,
case RISCV::VRSUB_VX:
// Vector Bitwise Logical Instructions
// Vector Single-Width Shift Instructions
- // EEW=SEW. EMUL=LMUL.
+ // EEW=SEW.
case RISCV::VAND_VI:
case RISCV::VAND_VV:
case RISCV::VAND_VX:
@@ -338,7 +327,7 @@ static OperandInfo getOperandInfo(const MachineOperand &MO,
case RISCV::VSRA_VV:
case RISCV::VSRA_VX:
// Vector Integer Min/Max Instructions
- // EEW=SEW. EMUL=LMUL.
+ // EEW=SEW.
case RISCV::VMINU_VV:
case RISCV::VMINU_VX:
case RISCV::VMIN_VV:
@@ -348,7 +337,7 @@ static OperandInfo getOperandInfo(const MachineOperand &MO,
case RISCV::VMAX_VV:
case RISCV::VMAX_VX:
// Vector Single-Width Integer Multiply Instructions
- // Source and Dest EEW=SEW and EMUL=LMUL.
+ // Source and Dest EEW=SEW.
case RISCV::VMUL_VV:
case RISCV::VMUL_VX:
case RISCV::VMULH_VV:
@@ -358,7 +347,7 @@ static OperandInfo getOperandInfo(const MachineOperand &MO,
case RISCV::VMULHSU_VV:
case RISCV::VMULHSU_VX:
// Vector Integer Divide Instructions
- // EEW=SEW. EMUL=LMUL.
+ // EEW=SEW.
case RISCV::VDIVU_VV:
case RISCV::VDIVU_VX:
case RISCV::VDIV_VV:
@@ -368,7 +357,7 @@ static OperandInfo getOperandInfo(const MachineOperand &MO,
case RISCV::VREM_VV:
case RISCV::VREM_VX:
// Vector Single-Width Integer Multiply-Add Instructions
- // EEW=SEW. EMUL=LMUL.
+ // EEW=SEW.
case RISCV::VMACC_VV:
case RISCV::VMACC_VX:
case RISCV::VNMSAC_VV:
@@ -379,8 +368,8 @@ static OperandInfo getOperandInfo(const MachineOperand &MO,
case RISCV::VNMSUB_VX:
// Vector Integer Merge Instructions
// Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions
- // EEW=SEW and EMUL=LMUL, except the mask operand has EEW=1 and EMUL=
- // (EEW/SEW)*LMUL. Mask operand is handled before this switch.
+ // EEW=SEW, except the mask operand has EEW=1. Mask operand is handled
+ // before this switch.
case RISCV::VMERGE_VIM:
case RISCV::VMERGE_VVM:
case RISCV::VMERGE_VXM:
@@ -393,7 +382,7 @@ static OperandInfo getOperandInfo(const MachineOperand &MO,
// Vector Fixed-Point Arithmetic Instructions
// Vector Single-Width Saturating Add and Subtract
// Vector Single-Width Averaging Add and Subtract
- // EEW=SEW. EMUL=LMUL.
+ // EEW=SEW.
case RISCV::VMV_V_I:
case RISCV::VMV_V_V:
case RISCV::VMV_V_X:
@@ -415,8 +404,13 @@ static OperandInfo getOperandInfo(const MachineOperand &MO,
case RISCV::VASUBU_VX:
case RISCV::VASUB_VV:
case RISCV::VASUB_VX:
+ // Vector Single-Width Fractional Multiply with Rounding and Saturation
+ // EEW=SEW. The instruction produces 2*SEW product internally but
+ // saturates to fit into SEW bits.
+ case RISCV::VSMUL_VV:
+ case RISCV::VSMUL_VX:
// Vector Single-Width Scaling Shift Instructions
- // EEW=SEW. EMUL=LMUL.
+ // EEW=SEW.
case RISCV::VSSRL_VI:
case RISCV::VSSRL_VV:
case RISCV::VSSRL_VX:
@@ -426,13 +420,13 @@ static OperandInfo getOperandInfo(const MachineOperand &MO,
// Vector Permutation Instructions
// Integer Scalar Move Instructions
// Floating-Point Scalar Move Instructions
- // EMUL=LMUL. EEW=SEW.
+ // EEW=SEW.
case RISCV::VMV_X_S:
case RISCV::VMV_S_X:
case RISCV::VFMV_F_S:
case RISCV::VFMV_S_F:
// Vector Slide Instructions
- // EMUL=LMUL. EEW=SEW.
+ // EEW=SEW.
case RISCV::VSLIDEUP_VI:
case RISCV::VSLIDEUP_VX:
case RISCV::VSLIDEDOWN_VI:
@@ -442,19 +436,62 @@ static OperandInfo getOperandInfo(const MachineOperand &MO,
case RISCV::VSLIDE1DOWN_VX:
case RISCV::VFSLIDE1DOWN_VF:
// Vector Register Gather Instructions
- // EMUL=LMUL. EEW=SEW. For mask operand, EMUL=1 and EEW=1.
+ // EEW=SEW. For mask operand, EEW=1.
case RISCV::VRGATHER_VI:
case RISCV::VRGATHER_VV:
case RISCV::VRGATHER_VX:
// Vector Compress Instruction
- // EMUL=LMUL. EEW=SEW.
+ // EEW=SEW.
case RISCV::VCOMPRESS_VM:
// Vector Element Index Instruction
case RISCV::VID_V:
- return OperandInfo(MIVLMul, MILog2SEW);
+ // Vector Single-Width Floating-Point Add/Subtract Instructions
+ case RISCV::VFADD_VF:
+ case RISCV::VFADD_VV:
+ case RISCV::VFSUB_VF:
+ case RISCV::VFSUB_VV:
+ case RISCV::VFRSUB_VF:
+ // Vector Single-Width Floating-Point Multiply/Divide Instructions
+ case RISCV::VFMUL_VF:
+ case RISCV::VFMUL_VV:
+ case RISCV::VFDIV_VF:
+ case RISCV::VFDIV_VV:
+ case RISCV::VFRDIV_VF:
+ // Vector Floating-Point Square-Root Instruction
+ case RISCV::VFSQRT_V:
+ // Vector Floating-Point Reciprocal Square-Root Estimate Instruction
+ case RISCV::VFRSQRT7_V:
+ // Vector Floating-Point Reciprocal Estimate Instruction
+ case RISCV::VFREC7_V:
+ // Vector Floating-Point MIN/MAX Instructions
+ case RISCV::VFMIN_VF:
+ case RISCV::VFMIN_VV:
+ case RISCV::VFMAX_VF:
+ case RISCV::VFMAX_VV:
+ // Vector Floating-Point Sign-Injection Instructions
+ case RISCV::VFSGNJ_VF:
+ case RISCV::VFSGNJ_VV:
+ case RISCV::VFSGNJN_VV:
+ case RISCV::VFSGNJN_VF:
+ case RISCV::VFSGNJX_VF:
+ case RISCV::VFSGNJX_VV:
+ // Vector Floating-Point Classify Instruction
+ case RISCV::VFCLASS_V:
+ // Vector Floating-Point Move Instruction
+ case RISCV::VFMV_V_F:
+ // Single-Width Floating-Point/Integer Type-Convert Instructions
+ case RISCV::VFCVT_XU_F_V:
+ case RISCV::VFCVT_X_F_V:
+ case RISCV::VFCVT_RTZ_XU_F_V:
+ case RISCV::VFCVT_RTZ_X_F_V:
+ case RISCV::VFCVT_F_XU_V:
+ case RISCV::VFCVT_F_X_V:
+ // Vector Floating-Point Merge Instruction
+ case RISCV::VFMERGE_VFM:
+ return MILog2SEW;
// Vector Widening Integer Add/Subtract
- // Def uses EEW=2*SEW and EMUL=2*LMUL. Operands use EEW=SEW and EMUL=LMUL.
+ // Def uses EEW=2*SEW . Operands use EEW=SEW.
case RISCV::VWADDU_VV:
case RISCV::VWADDU_VX:
case RISCV::VWSUBU_VV:
@@ -465,7 +502,7 @@ static OperandInfo getOperandInfo(const MachineOperand &MO,
case RISCV::VWSUB_VX:
case RISCV::VWSLL_VI:
// Vector Widening Integer Multiply Instructions
- // Source and Destination EMUL=LMUL. Destination EEW=2*SEW. Source EEW=SEW.
+ // Destination EEW=2*SEW. Source EEW=SEW.
case RISCV::VWMUL_VV:
case RISCV::VWMUL_VX:
case RISCV::VWMULSU_VV:
@@ -473,7 +510,7 @@ static OperandInfo getOperandInfo(const MachineOperand &MO,
case RISCV::VWMULU_VV:
case RISCV::VWMULU_VX:
// Vector Widening Integer Multiply-Add Instructions
- // Destination EEW=2*SEW and EMUL=2*LMUL. Source EEW=SEW and EMUL=LMUL.
+ // Destination EEW=2*SEW. Source EEW=SEW.
// A SEW-bit*SEW-bit multiply of the sources forms a 2*SEW-bit value, which
// is then added to the 2*SEW-bit Dest. These instructions never have a
// passthru operand.
@@ -483,14 +520,38 @@ static OperandInfo getOperandInfo(const MachineOperand &MO,
case RISCV::VWMACC_VX:
case RISCV::VWMACCSU_VV:
case RISCV::VWMACCSU_VX:
- case RISCV::VWMACCUS_VX: {
+ case RISCV::VWMACCUS_VX:
+ // Vector Widening Floating-Point Fused Multiply-Add Instructions
+ case RISCV::VFWMACC_VF:
+ case RISCV::VFWMACC_VV:
+ case RISCV::VFWNMACC_VF:
+ case RISCV::VFWNMACC_VV:
+ case RISCV::VFWMSAC_VF:
+ case RISCV::VFWMSAC_VV:
+ case RISCV::VFWNMSAC_VF:
+ case RISCV::VFWNMSAC_VV:
+ // Vector Widening Floating-Point Add/Subtract Instructions
+ // Dest EEW=2*SEW. Source EEW=SEW.
+ case RISCV::VFWADD_VV:
+ case RISCV::VFWADD_VF:
+ case RISCV::VFWSUB_VV:
+ case RISCV::VFWSUB_VF:
+ // Vector Widening Floating-Point Multiply
+ case RISCV::VFWMUL_VF:
+ case RISCV::VFWMUL_VV:
+ // Widening Floating-Point/Integer Type-Convert Instructions
+ case RISCV::VFWCVT_XU_F_V:
+ case RISCV::VFWCVT_X_F_V:
+ case RISCV::VFWCVT_RTZ_XU_F_V:
+ case RISCV::VFWCVT_RTZ_X_F_V:
+ case RISCV::VFWCVT_F_XU_V:
+ case RISCV::VFWCVT_F_X_V:
+ case RISCV::VFWCVT_F_F_V: {
unsigned Log2EEW = IsMODef ? MILog2SEW + 1 : MILog2SEW;
- RISCVII::VLMUL EMUL =
- IsMODef ? RISCVVType::twoTimesVLMUL(MIVLMul) : MIVLMul;
- return OperandInfo(EMUL, Log2EEW);
+ return Log2EEW;
}
- // Def and Op1 uses EEW=2*SEW and EMUL=2*LMUL. Op2 uses EEW=SEW and EMUL=LMUL
+ // Def and Op1 uses EEW=2*SEW. Op2 uses EEW=SEW.
case RISCV::VWADDU_WV:
case RISCV::VWADDU_WX:
case RISCV::VWSUBU_WV:
@@ -498,29 +559,31 @@ static OperandInfo getOperandInfo(const MachineOperand &MO,
case RISCV::VWADD_WV:
case RISCV::VWADD_WX:
case RISCV::VWSUB_WV:
- case RISCV::VWSUB_WX: {
+ case RISCV::VWSUB_WX:
+ // Vector Widening Floating-Point Add/Subtract Instructions
+ case RISCV::VFWADD_WF:
+ case RISCV::VFWADD_WV:
+ case RISCV::VFWSUB_WF:
+ case RISCV::VFWSUB_WV: {
bool IsOp1 = HasPassthru ? MO.getOperandNo() == 2 : MO.getOperandNo() == 1;
bool TwoTimes = IsMODef || IsOp1;
unsigned Log2EEW = TwoTimes ? MILog2SEW + 1 : MILog2SEW;
- RISCVII::VLMUL EMUL =
- TwoTimes ? RISCVVType::twoTimesVLMUL(MIVLMul) : MIVLMul;
- return OperandInfo(EMUL, Log2EEW);
+ return Log2EEW;
}
// Vector Integer Extension
case RISCV::VZEXT_VF2:
case RISCV::VSEXT_VF2:
- return getIntegerExtensionOperandInfo(2, MI, MO);
+ return getIntegerExtensionOperandEEW(2, MI, MO);
case RISCV::VZEXT_VF4:
case RISCV::VSEXT_VF4:
- return getIntegerExtensionOperandInfo(4, MI, MO);
+ return getIntegerExtensionOperandEEW(4, MI, MO);
case RISCV::VZEXT_VF8:
case RISCV::VSEXT_VF8:
- return getIntegerExtensionOperandInfo(8, MI, MO);
+ return getIntegerExtensionOperandEEW(8, MI, MO);
// Vector Narrowing Integer Right Shift Instructions
- // Destination EEW=SEW and EMUL=LMUL, Op 1 has EEW=2*SEW EMUL=2*LMUL. Op2 has
- // EEW=SEW EMUL=LMUL.
+ // Destination EEW=SEW, Op 1 has EEW=2*SEW. Op2 has EEW=SEW
case RISCV::VNSRL_WX:
case RISCV::VNSRL_WI:
case RISCV::VNSRL_WV:
@@ -528,19 +591,26 @@ static OperandInfo getOperandInfo(const MachineOperand &MO,
case RISCV::VNSRA_WV:
case RISCV::VNSRA_WX:
// Vector Narrowing Fixed-Point Clip Instructions
- // Destination and Op1 EEW=SEW and EMUL=LMUL. Op2 EEW=2*SEW and EMUL=2*LMUL
+ // Destination and Op1 EEW=SEW. Op2 EEW=2*SEW.
case RISCV::VNCLIPU_WI:
case RISCV::VNCLIPU_WV:
case RISCV::VNCLIPU_WX:
case RISCV::VNCLIP_WI:
case RISCV::VNCLIP_WV:
- case RISCV::VNCLIP_WX: {
+ case RISCV::VNCLIP_WX:
+ // Narrowing Floating-Point/Integer Type-Convert Instructions
+ case RISCV::VFNCVT_XU_F_W:
+ case RISCV::VFNCVT_X_F_W:
+ case RISCV::VFNCVT_RTZ_XU_F_W:
+ case RISCV::VFNCVT_RTZ_X_F_W:
+ case RISCV::VFNCVT_F_XU_W:
+ case RISCV::VFNCVT_F_X_W:
+ case RISCV::VFNCVT_F_F_W:
+ case RISCV::VFNCVT_ROD_F_F_W: {
bool IsOp1 = HasPassthru ? MO.getOperandNo() == 2 : MO.getOperandNo() == 1;
bool TwoTimes = IsOp1;
unsigned Log2EEW = TwoTimes ? MILog2SEW + 1 : MILog2SEW;
- RISCVII::VLMUL EMUL =
- TwoTimes ? RISCVVType::twoTimesVLMUL(MIVLMul) : MIVLMul;
- return OperandInfo(EMUL, Log2EEW);
+ return Log2EEW;
}
// Vector Mask Instructions
@@ -548,7 +618,7 @@ static OperandInfo getOperandInfo(const MachineOperand &MO,
// vmsbf.m set-before-first mask bit
// vmsif.m set-including-first mask bit
// vmsof.m set-only-first mask bit
- // EEW=1 and EMUL=(EEW/SEW)*LMUL
+ // EEW=1
// We handle the cases when operand is a v0 mask operand above the switch,
// but these instructions may use non-v0 mask operands and need to be handled
// specifically.
@@ -563,20 +633,20 @@ static OperandInfo getOperandInfo(const MachineOperand &MO,
case RISCV::VMSBF_M:
case RISCV::VMSIF_M:
case RISCV::VMSOF_M: {
- return OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(0, MI), 0);
+ return 0;
}
// Vector Iota Instruction
- // EEW=SEW and EMUL=LMUL, except the mask operand has EEW=1 and EMUL=
- // (EEW/SEW)*LMUL. Mask operand is not handled before this switch.
+ // EEW=SEW, except the mask operand has EEW=1. Mask operand is not handled
+ // before this switch.
case RISCV::VIOTA_M: {
if (IsMODef || MO.getOperandNo() == 1)
- return OperandInfo(MIVLMul, MILog2SEW);
- return OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(0, MI), 0);
+ return MILog2SEW;
+ return 0;
}
// Vector Integer Compare Instructions
- // Dest EEW=1 and EMUL=(EEW/SEW)*LMUL. Source EEW=SEW and EMUL=LMUL.
+ // Dest EEW=1. Source EEW=SEW.
case RISCV::VMSEQ_VI:
case RISCV::VMSEQ_VV:
case RISCV::VMSEQ_VX:
@@ -598,29 +668,87 @@ static OperandInfo getOperandInfo(const MachineOperand &MO,
case RISCV::VMSGT_VI:
case RISCV::VMSGT_VX:
// Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions
- // Dest EEW=1 and EMUL=(EEW/SEW)*LMUL. Source EEW=SEW and EMUL=LMUL. Mask
- // source operand handled above this switch.
+ // Dest EEW=1. Source EEW=SEW. Mask source operand handled above this switch.
case RISCV::VMADC_VIM:
case RISCV::VMADC_VVM:
case RISCV::VMADC_VXM:
case RISCV::VMSBC_VVM:
case RISCV::VMSBC_VXM:
- // Dest EEW=1 and EMUL=(EEW/SEW)*LMUL. Source EEW=SEW and EMUL=LMUL.
+ // Dest EEW=1. Source EEW=SEW.
case RISCV::VMADC_VV:
case RISCV::VMADC_VI:
case RISCV::VMADC_VX:
case RISCV::VMSBC_VV:
- case RISCV::VMSBC_VX: {
+ case RISCV::VMSBC_VX:
+ // 13.13. Vector Floating-Point Compare Instructions
+ // Dest EEW=1. Source EEW=SEW
+ case RISCV::VMFEQ_VF:
+ case RISCV::VMFEQ_VV:
+ case RISCV::VMFNE_VF:
+ case RISCV::VMFNE_VV:
+ case RISCV::VMFLT_VF:
+ case RISCV::VMFLT_VV:
+ case RISCV::VMFLE_VF:
+ case RISCV::VMFLE_VV:
+ case RISCV::VMFGT_VF:
+ case RISCV::VMFGE_VF: {
if (IsMODef)
- return OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(0, MI), 0);
- return OperandInfo(MIVLMul, MILog2SEW);
+ return 0;
+ return MILog2SEW;
+ }
+
+ // Vector Reduction Operations
+ // Vector Single-Width Integer Reduction Instructions
+ case RISCV::VREDAND_VS:
+ case RISCV::VREDMAX_VS:
+ case RISCV::VREDMAXU_VS:
+ case RISCV::VREDMIN_VS:
+ case RISCV::VREDMINU_VS:
+ case RISCV::VREDOR_VS:
+ case RISCV::VREDSUM_VS:
+ case RISCV::VREDXOR_VS: {
+ return MILog2SEW;
}
default:
- return {};
+ return std::nullopt;
}
}
+static std::optional<OperandInfo>
+getOperandInfo(const MachineOperand &MO, const MachineRegisterInfo *MRI) {
+ const MachineInstr &MI = *MO.getParent();
+ const RISCVVPseudosTable::PseudoInfo *RVV =
+ RISCVVPseudosTable::getPseudoInfo(MI.getOpcode());
+ assert(RVV && "Could not find MI in PseudoTable");
+
+ std::optional<unsigned> Log2EEW = getOperandLog2EEW(MO, MRI);
+ if (!Log2EEW)
+ return std::nullopt;
+
+ switch (RVV->BaseInstr) {
+ // Vector Reduction Operations
+ // Vector Single-Width Integer Reduction Instructions
+ // The Dest and VS1 only read element 0 of the vector register. Return just
+ // the EEW for these.
+ case RISCV::VREDAND_VS:
+ case RISCV::VREDMAX_VS:
+ case RISCV::VREDMAXU_VS:
+ case RISCV::VREDMIN_VS:
+ case RISCV::VREDMINU_VS:
+ case RISCV::VREDOR_VS:
+ case RISCV::VREDSUM_VS:
+ case RISCV::VREDXOR_VS:
+ if (MO.getOperandNo() != 2)
+ return OperandInfo(*Log2EEW);
+ break;
+ };
+
+ // All others have EMUL=EEW/SEW*LMUL
+ return OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(*Log2EEW, MI),
+ *Log2EEW);
+}
+
/// Return true if this optimization should consider MI for VL reduction. This
/// white-list approach simplifies this optimization for instructions that may
/// have more complex semantics with relation to how it uses VL.
@@ -632,6 +760,32 @@ static bool isSupportedInstr(const MachineInstr &MI) {
return false;
switch (RVV->BaseInstr) {
+ // Vector Unit-Stride Instructions
+ // Vector Strided Instructions
+ case RISCV::VLM_V:
+ case RISCV::VLE8_V:
+ case RISCV::VLSE8_V:
+ case RISCV::VLE16_V:
+ case RISCV::VLSE16_V:
+ case RISCV::VLE32_V:
+ case RISCV::VLSE32_V:
+ case RISCV::VLE64_V:
+ case RISCV::VLSE64_V:
+ // Vector Indexed Instructions
+ case RISCV::VLUXEI8_V:
+ case RISCV::VLOXEI8_V:
+ case RISCV::VLUXEI16_V:
+ case RISCV::VLOXEI16_V:
+ case RISCV::VLUXEI32_V:
+ case RISCV::VLOXEI32_V:
+ case RISCV::VLUXEI64_V:
+ case RISCV::VLOXEI64_V: {
+ for (const MachineMemOperand *MMO : MI.memoperands())
+ if (MMO->isVolatile())
+ return false;
+ return true;
+ }
+
// Vector Single-Width Integer Add and Subtract
case RISCV::VADD_VI:
case RISCV::VADD_VV:
@@ -801,6 +955,30 @@ static bool isSupportedInstr(const MachineInstr &MI) {
case RISCV::VMSOF_M:
case RISCV::VIOTA_M:
case RISCV::VID_V:
+ // Single-Width Floating-Point/Integer Type-Convert Instructions
+ case RISCV::VFCVT_XU_F_V:
+ case RISCV::VFCVT_X_F_V:
+ case RISCV::VFCVT_RTZ_XU_F_V:
+ case RISCV::VFCVT_RTZ_X_F_V:
+ case RISCV::VFCVT_F_XU_V:
+ case RISCV::VFCVT_F_X_V:
+ // Widening Floating-Point/Integer Type-Convert Instructions
+ case RISCV::VFWCVT_XU_F_V:
+ case RISCV::VFWCVT_X_F_V:
+ case RISCV::VFWCVT_RTZ_XU_F_V:
+ case RISCV::VFWCVT_RTZ_X_F_V:
+ case RISCV::VFWCVT_F_XU_V:
+ case RISCV::VFWCVT_F_X_V:
+ case RISCV::VFWCVT_F_F_V:
+ // Narrowing Floating-Point/Integer Type-Convert Instructions
+ case RISCV::VFNCVT_XU_F_W:
+ case RISCV::VFNCVT_X_F_W:
+ case RISCV::VFNCVT_RTZ_XU_F_W:
+ case RISCV::VFNCVT_RTZ_X_F_W:
+ case RISCV::VFNCVT_F_XU_W:
+ case RISCV::VFNCVT_F_X_W:
+ case RISCV::VFNCVT_F_F_W:
+ case RISCV::VFNCVT_ROD_F_F_W:
return true;
}
@@ -835,6 +1013,9 @@ static bool isVectorOpUsedAsScalarOp(MachineOperand &MO) {
case RISCV::VFWREDOSUM_VS:
case RISCV::VFWREDUSUM_VS:
return MO.getOperandNo() == 3;
+ case RISCV::VMV_X_S:
+ case RISCV::VFMV_F_S:
+ return MO.getOperandNo() == 1;
default:
return false;
}
@@ -904,6 +1085,11 @@ bool RISCVVLOptimizer::isCandidate(const MachineInstr &MI) const {
return false;
}
+ if (MI.mayRaiseFPException()) {
+ LLVM_DEBUG(dbgs() << "Not a candidate because may raise FP exception\n");
+ return false;
+ }
+
// Some instructions that produce vectors have semantics that make it more
// difficult to determine whether the VL can be reduced. For example, some
// instructions, such as reductions, may write lanes past VL to a scalar
@@ -925,79 +1111,103 @@ bool RISCVVLOptimizer::isCandidate(const MachineInstr &MI) const {
return true;
}
-bool RISCVVLOptimizer::checkUsers(const MachineOperand *&CommonVL,
- MachineInstr &MI) {
+std::optional<MachineOperand>
+RISCVVLOptimizer::getMinimumVLForUser(MachineOperand &UserOp) {
+ const MachineInstr &UserMI = *UserOp.getParent();
+ const MCInstrDesc &Desc = UserMI.getDesc();
+
+ if (!RISCVII::hasVLOp(Desc.TSFlags) || !RISCVII::hasSEWOp(Desc.TSFlags)) {
+ LLVM_DEBUG(dbgs() << " Abort due to lack of VL, assume that"
+ " use VLMAX\n");
+ return std::nullopt;
+ }
+
+ // Instructions like reductions may use a vector register as a scalar
+ // register. In this case, we should treat it as only reading the first lane.
+ if (isVectorOpUsedAsScalarOp(UserOp)) {
+ [[maybe_unused]] Register R = UserOp.getReg();
+ [[maybe_unused]] const TargetRegisterClass *RC = MRI->getRegClass(R);
+ assert(RISCV::VRRegClass.hasSubClassEq(RC) &&
+ "Expect LMUL 1 register class for vector as scalar operands!");
+ LLVM_DEBUG(dbgs() << " Used this operand as a scalar operand\n");
+
+ return MachineOperand::CreateImm(1);
+ }
+
+ unsigned VLOpNum = RISCVII::getVLOpNum(Desc);
+ const MachineOperand &VLOp = UserMI.getOperand(VLOpNum);
+ // Looking for an immediate or a register VL that isn't X0.
+ assert((!VLOp.isReg() || VLOp.getReg() != RISCV::X0) &&
+ "Did not expect X0 VL");
+ return VLOp;
+}
+
+std::optional<MachineOperand> RISCVVLOptimizer::checkUsers(MachineInstr &MI) {
// FIXME: Avoid visiting each user for each time we visit something on the
// worklist, combined with an extra visit from the outer loop. Restructure
// along lines of an instcombine style worklist which integrates the outer
// pass.
- bool CanReduceVL = true;
+ std::optional<MachineOperand> CommonVL;
for (auto &UserOp : MRI->use_operands(MI.getOperand(0).getReg())) {
const MachineInstr &UserMI = *UserOp.getParent();
LLVM_DEBUG(dbgs() << " Checking user: " << UserMI << "\n");
-
- // Instructions like reductions may use a vector register as a scalar
- // register. In this case, we should treat it like a scalar register which
- // does not impact the decision on whether to optimize VL.
- // TODO: Treat it like a scalar register instead of bailing out.
- if (isVectorOpUsedAsScalarOp(UserOp)) {
- CanReduceVL = false;
- break;
- }
-
if (mayReadPastVL(UserMI)) {
LLVM_DEBUG(dbgs() << " Abort because used by unsafe instruction\n");
- CanReduceVL = false;
- break;
+ return std::nullopt;
}
// Tied operands might pass through.
if (UserOp.isTied()) {
LLVM_DEBUG(dbgs() << " Abort because user used as tied operand\n");
- CanReduceVL = false;
- break;
- }
-
- const MCInstrDesc &Desc = UserMI.getDesc();
- if (!RISCVII::hasVLOp(Desc.TSFlags) || !RISCVII::hasSEWOp(Desc.TSFlags)) {
- LLVM_DEBUG(dbgs() << " Abort due to lack of VL or SEW, assume that"
- " use VLMAX\n");
- CanReduceVL = false;
- break;
+ return std::nullopt;
}
- unsigned VLOpNum = RISCVII::getVLOpNum(Desc);
- const MachineOperand &VLOp = UserMI.getOperand(VLOpNum);
-
- // Looking for an immediate or a register VL that isn't X0.
- assert((!VLOp.isReg() || VLOp.getReg() != RISCV::X0) &&
- "Did not expect X0 VL");
+ auto VLOp = getMinimumVLForUser(UserOp);
+ if (!VLOp)
+ return std::nullopt;
// Use the largest VL among all the users. If we cannot determine this
// statically, then we cannot optimize the VL.
- if (!CommonVL || RISCV::isVLKnownLE(*CommonVL, VLOp)) {
- CommonVL = &VLOp;
+ if (!CommonVL || RISCV::isVLKnownLE(*CommonVL, *VLOp)) {
+ CommonVL = *VLOp;
LLVM_DEBUG(dbgs() << " User VL is: " << VLOp << "\n");
- } else if (!RISCV::isVLKnownLE(VLOp, *CommonVL)) {
+ } else if (!RISCV::isVLKnownLE(*VLOp, *CommonVL)) {
LLVM_DEBUG(dbgs() << " Abort because cannot determine a common VL\n");
- CanReduceVL = false;
- break;
+ return std::nullopt;
+ }
+
+ if (!RISCVII::hasSEWOp(UserMI.getDesc().TSFlags)) {
+ LLVM_DEBUG(dbgs() << " Abort due to lack of SEW operand\n");
+ return std::nullopt;
+ }
+
+ std::optional<OperandInfo> ConsumerInfo = getOperandInfo(UserOp, MRI);
+ std::optional<OperandInfo> ProducerInfo =
+ getOperandInfo(MI.getOperand(0), MRI);
+ if (!ConsumerInfo || !ProducerInfo) {
+ LLVM_DEBUG(dbgs() << " Abort due to unknown operand information.\n");
+ LLVM_DEBUG(dbgs() << " ConsumerInfo is: " << ConsumerInfo << "\n");
+ LLVM_DEBUG(dbgs() << " ProducerInfo is: " << ProducerInfo << "\n");
+ return std::nullopt;
}
- // The SEW and LMUL of destination and source registers need to match.
- OperandInfo ConsumerInfo = getOperandInfo(UserOp, MRI);
- OperandInfo ProducerInfo = getOperandInfo(MI.getOperand(0), MRI);
- if (ConsumerInfo.isUnknown() || ProducerInfo.isUnknown() ||
- !OperandInfo::EMULAndEEWAreEqual(ConsumerInfo, ProducerInfo)) {
- LLVM_DEBUG(dbgs() << " Abort due to incompatible or unknown "
- "information for EMUL or EEW.\n");
+ // If the operand is used as a scalar operand, then the EEW must be
+ // compatible. Otherwise, the EMUL *and* EEW must be compatible.
+ bool IsVectorOpUsedAsScalarOp = isVectorOpUsedAsScalarOp(UserOp);
+ if ((IsVectorOpUsedAsScalarOp &&
+ !OperandInfo::EEWAreEqual(*ConsumerInfo, *ProducerInfo)) ||
+ (!IsVectorOpUsedAsScalarOp &&
+ !OperandInfo::EMULAndEEWAreEqual(*ConsumerInfo, *ProducerInfo))) {
+ LLVM_DEBUG(
+ dbgs()
+ << " Abort due to incompatible information for EMUL or EEW.\n");
LLVM_DEBUG(dbgs() << " ConsumerInfo is: " << ConsumerInfo << "\n");
LLVM_DEBUG(dbgs() << " ProducerInfo is: " << ProducerInfo << "\n");
- CanReduceVL = false;
- break;
+ return std::nullopt;
}
}
- return CanReduceVL;
+
+ return CommonVL;
}
bool RISCVVLOptimizer::tryReduceVL(MachineInstr &OrigMI) {
@@ -1009,12 +1219,11 @@ bool RISCVVLOptimizer::tryReduceVL(MachineInstr &OrigMI) {
MachineInstr &MI = *Worklist.pop_back_val();
LLVM_DEBUG(dbgs() << "Trying to reduce VL for " << MI << "\n");
- const MachineOperand *CommonVL = nullptr;
- bool CanReduceVL = true;
- if (isVectorRegClass(MI.getOperand(0).getReg(), MRI))
- CanReduceVL = checkUsers(CommonVL, MI);
+ if (!isVectorRegClass(MI.getOperand(0).getReg(), MRI))
+ continue;
- if (!CanReduceVL || !CommonVL)
+ auto CommonVL = checkUsers(MI);
+ if (!CommonVL)
continue;
assert((CommonVL->isImm() || CommonVL->getReg().isVirtual()) &&
diff --git a/llvm/lib/Target/SPIRV/CMakeLists.txt b/llvm/lib/Target/SPIRV/CMakeLists.txt
index aa83d99..a79e19f 100644
--- a/llvm/lib/Target/SPIRV/CMakeLists.txt
+++ b/llvm/lib/Target/SPIRV/CMakeLists.txt
@@ -20,7 +20,6 @@ add_llvm_target(SPIRVCodeGen
SPIRVCallLowering.cpp
SPIRVInlineAsmLowering.cpp
SPIRVCommandLine.cpp
- SPIRVDuplicatesTracker.cpp
SPIRVEmitIntrinsics.cpp
SPIRVGlobalRegistry.cpp
SPIRVInstrInfo.cpp
diff --git a/llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp b/llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp
index 4012bd7..78add92 100644
--- a/llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp
@@ -274,7 +274,7 @@ void SPIRVAsmPrinter::emitInstruction(const MachineInstr *MI) {
}
void SPIRVAsmPrinter::outputModuleSection(SPIRV::ModuleSectionType MSType) {
- for (MachineInstr *MI : MAI->getMSInstrs(MSType))
+ for (const MachineInstr *MI : MAI->getMSInstrs(MSType))
outputInstruction(MI);
}
@@ -326,7 +326,7 @@ void SPIRVAsmPrinter::outputOpMemoryModel() {
void SPIRVAsmPrinter::outputEntryPoints() {
// Find all OpVariable IDs with required StorageClass.
DenseSet<Register> InterfaceIDs;
- for (MachineInstr *MI : MAI->GlobalVarList) {
+ for (const MachineInstr *MI : MAI->GlobalVarList) {
assert(MI->getOpcode() == SPIRV::OpVariable);
auto SC = static_cast<SPIRV::StorageClass::StorageClass>(
MI->getOperand(2).getImm());
@@ -336,14 +336,14 @@ void SPIRVAsmPrinter::outputEntryPoints() {
// declaring all global variables referenced by the entry point call tree.
if (ST->isAtLeastSPIRVVer(VersionTuple(1, 4)) ||
SC == SPIRV::StorageClass::Input || SC == SPIRV::StorageClass::Output) {
- MachineFunction *MF = MI->getMF();
+ const MachineFunction *MF = MI->getMF();
Register Reg = MAI->getRegisterAlias(MF, MI->getOperand(0).getReg());
InterfaceIDs.insert(Reg);
}
}
// Output OpEntryPoints adding interface args to all of them.
- for (MachineInstr *MI : MAI->getMSInstrs(SPIRV::MB_EntryPoints)) {
+ for (const MachineInstr *MI : MAI->getMSInstrs(SPIRV::MB_EntryPoints)) {
SPIRVMCInstLower MCInstLowering;
MCInst TmpInst;
MCInstLowering.lower(MI, TmpInst, MAI);
@@ -381,9 +381,8 @@ void SPIRVAsmPrinter::outputGlobalRequirements() {
void SPIRVAsmPrinter::outputExtFuncDecls() {
// Insert OpFunctionEnd after each declaration.
- SmallVectorImpl<MachineInstr *>::iterator
- I = MAI->getMSInstrs(SPIRV::MB_ExtFuncDecls).begin(),
- E = MAI->getMSInstrs(SPIRV::MB_ExtFuncDecls).end();
+ auto I = MAI->getMSInstrs(SPIRV::MB_ExtFuncDecls).begin(),
+ E = MAI->getMSInstrs(SPIRV::MB_ExtFuncDecls).end();
for (; I != E; ++I) {
outputInstruction(*I);
if ((I + 1) == E || (*(I + 1))->getOpcode() == SPIRV::OpFunction)
diff --git a/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp b/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp
index fa37313f..44b6f5f8 100644
--- a/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp
@@ -418,6 +418,7 @@ bool SPIRVCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
.addImm(FuncControl)
.addUse(GR->getSPIRVTypeID(FuncTy));
GR->recordFunctionDefinition(&F, &MB.getInstr()->getOperand(0));
+ GR->addGlobalObject(&F, &MIRBuilder.getMF(), FuncVReg);
// Add OpFunctionParameter instructions
int i = 0;
@@ -431,6 +432,7 @@ bool SPIRVCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
.addUse(GR->getSPIRVTypeID(ArgTypeVRegs[i]));
if (F.isDeclaration())
GR->add(&Arg, &MIRBuilder.getMF(), ArgReg);
+ GR->addGlobalObject(&Arg, &MIRBuilder.getMF(), ArgReg);
i++;
}
// Name the function.
diff --git a/llvm/lib/Target/SPIRV/SPIRVDuplicatesTracker.cpp b/llvm/lib/Target/SPIRV/SPIRVDuplicatesTracker.cpp
deleted file mode 100644
index 48df845..0000000
--- a/llvm/lib/Target/SPIRV/SPIRVDuplicatesTracker.cpp
+++ /dev/null
@@ -1,136 +0,0 @@
-//===-- SPIRVDuplicatesTracker.cpp - SPIR-V Duplicates Tracker --*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// General infrastructure for keeping track of the values that according to
-// the SPIR-V binary layout should be global to the whole module.
-//
-//===----------------------------------------------------------------------===//
-
-#include "SPIRVDuplicatesTracker.h"
-#include "SPIRVInstrInfo.h"
-
-#define DEBUG_TYPE "build-dep-graph"
-
-using namespace llvm;
-
-template <typename T>
-void SPIRVGeneralDuplicatesTracker::prebuildReg2Entry(
- SPIRVDuplicatesTracker<T> &DT, SPIRVReg2EntryTy &Reg2Entry,
- const SPIRVInstrInfo *TII) {
- for (auto &TPair : DT.getAllUses()) {
- for (auto &RegPair : TPair.second) {
- const MachineFunction *MF = RegPair.first;
- Register R = RegPair.second;
- MachineInstr *MI = MF->getRegInfo().getUniqueVRegDef(R);
- if (!MI || (TPair.second.getIsConst() && !TII->isConstantInstr(*MI)))
- continue;
- Reg2Entry[&MI->getOperand(0)] = &TPair.second;
- }
- }
-}
-
-void SPIRVGeneralDuplicatesTracker::buildDepsGraph(
- std::vector<SPIRV::DTSortableEntry *> &Graph, const SPIRVInstrInfo *TII,
- MachineModuleInfo *MMI = nullptr) {
- SPIRVReg2EntryTy Reg2Entry;
- prebuildReg2Entry(TT, Reg2Entry, TII);
- prebuildReg2Entry(CT, Reg2Entry, TII);
- prebuildReg2Entry(GT, Reg2Entry, TII);
- prebuildReg2Entry(FT, Reg2Entry, TII);
- prebuildReg2Entry(AT, Reg2Entry, TII);
- prebuildReg2Entry(MT, Reg2Entry, TII);
- prebuildReg2Entry(ST, Reg2Entry, TII);
-
- for (auto &Op2E : Reg2Entry) {
- SPIRV::DTSortableEntry *E = Op2E.second;
- Graph.push_back(E);
- for (auto &U : *E) {
- const MachineRegisterInfo &MRI = U.first->getRegInfo();
- MachineInstr *MI = MRI.getUniqueVRegDef(U.second);
- if (!MI)
- continue;
- assert(MI && MI->getParent() && "No MachineInstr created yet");
- for (auto i = MI->getNumDefs(); i < MI->getNumOperands(); i++) {
- MachineOperand &Op = MI->getOperand(i);
- if (!Op.isReg())
- continue;
- MachineInstr *VRegDef = MRI.getVRegDef(Op.getReg());
- // References to a function via function pointers generate virtual
- // registers without a definition. We are able to resolve this
- // reference using Globar Register info into an OpFunction instruction
- // but do not expect to find it in Reg2Entry.
- if (MI->getOpcode() == SPIRV::OpConstantFunctionPointerINTEL && i == 2)
- continue;
- MachineOperand *RegOp = &VRegDef->getOperand(0);
- if (Reg2Entry.count(RegOp) == 0 &&
- (MI->getOpcode() != SPIRV::OpVariable || i != 3)) {
- // try to repair the unexpected code pattern
- bool IsFixed = false;
- if (VRegDef->getOpcode() == TargetOpcode::G_CONSTANT &&
- RegOp->isReg() && MRI.getType(RegOp->getReg()).isScalar()) {
- const Constant *C = VRegDef->getOperand(1).getCImm();
- add(C, MI->getParent()->getParent(), RegOp->getReg());
- auto Iter = CT.Storage.find(C);
- if (Iter != CT.Storage.end()) {
- SPIRV::DTSortableEntry &MissedEntry = Iter->second;
- Reg2Entry[RegOp] = &MissedEntry;
- IsFixed = true;
- }
- }
- if (!IsFixed) {
- std::string DiagMsg;
- raw_string_ostream OS(DiagMsg);
- OS << "Unexpected pattern while building a dependency "
- "graph.\nInstruction: ";
- MI->print(OS);
- OS << "Operand: ";
- Op.print(OS);
- OS << "\nOperand definition: ";
- VRegDef->print(OS);
- report_fatal_error(DiagMsg.c_str());
- }
- }
- if (Reg2Entry.count(RegOp))
- E->addDep(Reg2Entry[RegOp]);
- }
-
- if (E->getIsFunc()) {
- MachineInstr *Next = MI->getNextNode();
- if (Next && (Next->getOpcode() == SPIRV::OpFunction ||
- Next->getOpcode() == SPIRV::OpFunctionParameter)) {
- E->addDep(Reg2Entry[&Next->getOperand(0)]);
- }
- }
- }
- }
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
- if (MMI) {
- const Module *M = MMI->getModule();
- for (auto F = M->begin(), E = M->end(); F != E; ++F) {
- const MachineFunction *MF = MMI->getMachineFunction(*F);
- if (!MF)
- continue;
- for (const MachineBasicBlock &MBB : *MF) {
- for (const MachineInstr &CMI : MBB) {
- MachineInstr &MI = const_cast<MachineInstr &>(CMI);
- MI.dump();
- if (MI.getNumExplicitDefs() > 0 &&
- Reg2Entry.count(&MI.getOperand(0))) {
- dbgs() << "\t[";
- for (SPIRV::DTSortableEntry *D :
- Reg2Entry.lookup(&MI.getOperand(0))->getDeps())
- dbgs() << Register::virtReg2Index(D->lookup(MF)) << ", ";
- dbgs() << "]\n";
- }
- }
- }
- }
- }
-#endif
-}
diff --git a/llvm/lib/Target/SPIRV/SPIRVDuplicatesTracker.h b/llvm/lib/Target/SPIRV/SPIRVDuplicatesTracker.h
index 6847da0..e574892 100644
--- a/llvm/lib/Target/SPIRV/SPIRVDuplicatesTracker.h
+++ b/llvm/lib/Target/SPIRV/SPIRVDuplicatesTracker.h
@@ -211,23 +211,7 @@ class SPIRVGeneralDuplicatesTracker {
SPIRVDuplicatesTracker<MachineInstr> MT;
SPIRVDuplicatesTracker<SPIRV::SpecialTypeDescriptor> ST;
- // NOTE: using MOs instead of regs to get rid of MF dependency to be able
- // to use flat data structure.
- // NOTE: replacing DenseMap with MapVector doesn't affect overall correctness
- // but makes LITs more stable, should prefer DenseMap still due to
- // significant perf difference.
- using SPIRVReg2EntryTy =
- MapVector<MachineOperand *, SPIRV::DTSortableEntry *>;
-
- template <typename T>
- void prebuildReg2Entry(SPIRVDuplicatesTracker<T> &DT,
- SPIRVReg2EntryTy &Reg2Entry,
- const SPIRVInstrInfo *TII);
-
public:
- void buildDepsGraph(std::vector<SPIRV::DTSortableEntry *> &Graph,
- const SPIRVInstrInfo *TII, MachineModuleInfo *MMI);
-
void add(const Type *Ty, const MachineFunction *MF, Register R) {
TT.add(unifyPtrType(Ty), MF, R);
}
diff --git a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
index 77b5421..d2b14d6 100644
--- a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
@@ -1841,20 +1841,20 @@ void SPIRVEmitIntrinsics::processGlobalValue(GlobalVariable &GV,
// Skip special artifical variable llvm.global.annotations.
if (GV.getName() == "llvm.global.annotations")
return;
- if (GV.hasInitializer() && !isa<UndefValue>(GV.getInitializer())) {
+ Constant *Init = nullptr;
+ if (hasInitializer(&GV)) {
// Deduce element type and store results in Global Registry.
// Result is ignored, because TypedPointerType is not supported
// by llvm IR general logic.
deduceElementTypeHelper(&GV, false);
- Constant *Init = GV.getInitializer();
+ Init = GV.getInitializer();
Type *Ty = isAggrConstForceInt32(Init) ? B.getInt32Ty() : Init->getType();
Constant *Const = isAggrConstForceInt32(Init) ? B.getInt32(1) : Init;
auto *InitInst = B.CreateIntrinsic(Intrinsic::spv_init_global,
{GV.getType(), Ty}, {&GV, Const});
InitInst->setArgOperand(1, Init);
}
- if ((!GV.hasInitializer() || isa<UndefValue>(GV.getInitializer())) &&
- GV.getNumUses() == 0)
+ if (!Init && GV.getNumUses() == 0)
B.CreateIntrinsic(Intrinsic::spv_unref_global, GV.getType(), &GV);
}
diff --git a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp
index 0c424477..a06c62e 100644
--- a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp
@@ -721,6 +721,7 @@ Register SPIRVGlobalRegistry::buildGlobalVariable(
}
Reg = MIB->getOperand(0).getReg();
DT.add(GVar, &MIRBuilder.getMF(), Reg);
+ addGlobalObject(GVar, &MIRBuilder.getMF(), Reg);
// Set to Reg the same type as ResVReg has.
auto MRI = MIRBuilder.getMRI();
diff --git a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h
index ec2386fa..528baf5 100644
--- a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h
+++ b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h
@@ -89,6 +89,9 @@ class SPIRVGlobalRegistry {
// Intrinsic::spv_assign_ptr_type instructions.
DenseMap<Value *, CallInst *> AssignPtrTypeInstr;
+ // Maps OpVariable and OpFunction-related v-regs to its LLVM IR definition.
+ DenseMap<std::pair<const MachineFunction *, Register>, const Value *> Reg2GO;
+
// Add a new OpTypeXXX instruction without checking for duplicates.
SPIRVType *createSPIRVType(const Type *Type, MachineIRBuilder &MIRBuilder,
SPIRV::AccessQualifier::AccessQualifier AQ =
@@ -151,15 +154,17 @@ public:
return DT.find(F, MF);
}
- void buildDepsGraph(std::vector<SPIRV::DTSortableEntry *> &Graph,
- const SPIRVInstrInfo *TII,
- MachineModuleInfo *MMI = nullptr) {
- DT.buildDepsGraph(Graph, TII, MMI);
- }
-
void setBound(unsigned V) { Bound = V; }
unsigned getBound() { return Bound; }
+ void addGlobalObject(const Value *V, const MachineFunction *MF, Register R) {
+ Reg2GO[std::make_pair(MF, R)] = V;
+ }
+ const Value *getGlobalObject(const MachineFunction *MF, Register R) {
+ auto It = Reg2GO.find(std::make_pair(MF, R));
+ return It == Reg2GO.end() ? nullptr : It->second;
+ }
+
// Add a record to the map of function return pointer types.
void addReturnType(const Function *ArgF, TypedPointerType *DerivedTy) {
FunResPointerTypes[ArgF] = DerivedTy;
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.cpp b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.cpp
index bd9e77e..9a140e7 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.cpp
@@ -47,6 +47,19 @@ bool SPIRVInstrInfo::isConstantInstr(const MachineInstr &MI) const {
}
}
+bool SPIRVInstrInfo::isSpecConstantInstr(const MachineInstr &MI) const {
+ switch (MI.getOpcode()) {
+ case SPIRV::OpSpecConstantTrue:
+ case SPIRV::OpSpecConstantFalse:
+ case SPIRV::OpSpecConstant:
+ case SPIRV::OpSpecConstantComposite:
+ case SPIRV::OpSpecConstantOp:
+ return true;
+ default:
+ return false;
+ }
+}
+
bool SPIRVInstrInfo::isInlineAsmDefInstr(const MachineInstr &MI) const {
switch (MI.getOpcode()) {
case SPIRV::OpAsmTargetINTEL:
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.h b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.h
index 67d2d97..4e5059b 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.h
+++ b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.h
@@ -30,6 +30,7 @@ public:
const SPIRVRegisterInfo &getRegisterInfo() const { return RI; }
bool isHeaderInstr(const MachineInstr &MI) const;
bool isConstantInstr(const MachineInstr &MI) const;
+ bool isSpecConstantInstr(const MachineInstr &MI) const;
bool isInlineAsmDefInstr(const MachineInstr &MI) const;
bool isTypeDeclInstr(const MachineInstr &MI) const;
bool isDecorationInstr(const MachineInstr &MI) const;
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
index 289d5f3..28c9b81 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
@@ -1105,6 +1105,7 @@ bool SPIRVInstructionSelector::selectMemOperation(Register ResVReg,
Constant::getNullValue(LLVMArrTy));
Register VarReg = MRI->createGenericVirtualRegister(LLT::scalar(64));
GR.add(GV, GR.CurMF, VarReg);
+ GR.addGlobalObject(GV, GR.CurMF, VarReg);
Result &=
BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpVariable))
@@ -2881,6 +2882,14 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg,
// translated to a `LocalInvocationId` builtin variable
return loadVec3BuiltinInputID(SPIRV::BuiltIn::LocalInvocationId, ResVReg,
ResType, I);
+ case Intrinsic::spv_group_id:
+ // The HLSL SV_GroupId semantic is lowered to
+ // llvm.spv.group.id intrinsic in LLVM IR for SPIR-V backend.
+ //
+ // In SPIR-V backend, llvm.spv.group.id is now translated to a `WorkgroupId`
+ // builtin variable
+ return loadVec3BuiltinInputID(SPIRV::BuiltIn::WorkgroupId, ResVReg, ResType,
+ I);
case Intrinsic::spv_fdot:
return selectFloatDot(ResVReg, ResType, I);
case Intrinsic::spv_udot:
@@ -2906,6 +2915,8 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg,
return selectAny(ResVReg, ResType, I);
case Intrinsic::spv_cross:
return selectExtInst(ResVReg, ResType, I, CL::cross, GL::Cross);
+ case Intrinsic::spv_distance:
+ return selectExtInst(ResVReg, ResType, I, CL::distance, GL::Distance);
case Intrinsic::spv_lerp:
return selectExtInst(ResVReg, ResType, I, CL::mix, GL::FMix);
case Intrinsic::spv_length:
@@ -3450,7 +3461,7 @@ bool SPIRVInstructionSelector::selectGlobalValue(
ID = UnnamedGlobalIDs.size();
GlobalIdent = "__unnamed_" + Twine(ID).str();
} else {
- GlobalIdent = GV->getGlobalIdentifier();
+ GlobalIdent = GV->getName();
}
// Behaviour of functions as operands depends on availability of the
@@ -3482,18 +3493,25 @@ bool SPIRVInstructionSelector::selectGlobalValue(
// References to a function via function pointers generate virtual
// registers without a definition. We will resolve it later, during
// module analysis stage.
+ Register ResTypeReg = GR.getSPIRVTypeID(ResType);
MachineRegisterInfo *MRI = MIRBuilder.getMRI();
- Register FuncVReg = MRI->createGenericVirtualRegister(LLT::scalar(64));
- MRI->setRegClass(FuncVReg, &SPIRV::iIDRegClass);
- MachineInstrBuilder MB =
+ Register FuncVReg =
+ MRI->createGenericVirtualRegister(GR.getRegType(ResType));
+ MRI->setRegClass(FuncVReg, &SPIRV::pIDRegClass);
+ MachineInstrBuilder MIB1 =
+ BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpUndef))
+ .addDef(FuncVReg)
+ .addUse(ResTypeReg);
+ MachineInstrBuilder MIB2 =
BuildMI(BB, I, I.getDebugLoc(),
TII.get(SPIRV::OpConstantFunctionPointerINTEL))
.addDef(NewReg)
- .addUse(GR.getSPIRVTypeID(ResType))
+ .addUse(ResTypeReg)
.addUse(FuncVReg);
// mapping the function pointer to the used Function
- GR.recordFunctionPointer(&MB.getInstr()->getOperand(2), GVFun);
- return MB.constrainAllUses(TII, TRI, RBI);
+ GR.recordFunctionPointer(&MIB2.getInstr()->getOperand(2), GVFun);
+ return MIB1.constrainAllUses(TII, TRI, RBI) &&
+ MIB2.constrainAllUses(TII, TRI, RBI);
}
return BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpConstantNull))
.addDef(NewReg)
@@ -3506,18 +3524,16 @@ bool SPIRVInstructionSelector::selectGlobalValue(
auto GlobalVar = cast<GlobalVariable>(GV);
assert(GlobalVar->getName() != "llvm.global.annotations");
- bool HasInit = GlobalVar->hasInitializer() &&
- !isa<UndefValue>(GlobalVar->getInitializer());
- // Skip empty declaration for GVs with initilaizers till we get the decl with
+ // Skip empty declaration for GVs with initializers till we get the decl with
// passed initializer.
- if (HasInit && !Init)
+ if (hasInitializer(GlobalVar) && !Init)
return true;
- bool HasLnkTy = GV->getLinkage() != GlobalValue::InternalLinkage;
+ bool HasLnkTy = !GV->hasInternalLinkage() && !GV->hasPrivateLinkage();
SPIRV::LinkageType::LinkageType LnkType =
- (GV->isDeclaration() || GV->hasAvailableExternallyLinkage())
+ GV->isDeclarationForLinker()
? SPIRV::LinkageType::Import
- : (GV->getLinkage() == GlobalValue::LinkOnceODRLinkage &&
+ : (GV->hasLinkOnceODRLinkage() &&
STI.canUseExtension(SPIRV::Extension::SPV_KHR_linkonce_odr)
? SPIRV::LinkageType::LinkOnceODR
: SPIRV::LinkageType::Export);
diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
index 6371c67..63adf54 100644
--- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
@@ -216,102 +216,262 @@ void SPIRVModuleAnalysis::setBaseInfo(const Module &M) {
}
}
-// Collect MI which defines the register in the given machine function.
-static void collectDefInstr(Register Reg, const MachineFunction *MF,
- SPIRV::ModuleAnalysisInfo *MAI,
- SPIRV::ModuleSectionType MSType,
- bool DoInsert = true) {
- assert(MAI->hasRegisterAlias(MF, Reg) && "Cannot find register alias");
- MachineInstr *MI = MF->getRegInfo().getUniqueVRegDef(Reg);
- assert(MI && "There should be an instruction that defines the register");
- MAI->setSkipEmission(MI);
- if (DoInsert)
- MAI->MS[MSType].push_back(MI);
+// Returns a representation of an instruction as a vector of MachineOperand
+// hash values, see llvm::hash_value(const MachineOperand &MO) for details.
+// This creates a signature of the instruction with the same content
+// that MachineOperand::isIdenticalTo uses for comparison.
+static InstrSignature instrToSignature(const MachineInstr &MI,
+ SPIRV::ModuleAnalysisInfo &MAI,
+ bool UseDefReg) {
+ InstrSignature Signature{MI.getOpcode()};
+ for (unsigned i = 0; i < MI.getNumOperands(); ++i) {
+ const MachineOperand &MO = MI.getOperand(i);
+ size_t h;
+ if (MO.isReg()) {
+ if (!UseDefReg && MO.isDef())
+ continue;
+ Register RegAlias = MAI.getRegisterAlias(MI.getMF(), MO.getReg());
+ if (!RegAlias.isValid()) {
+ LLVM_DEBUG({
+ dbgs() << "Unexpectedly, no global id found for the operand ";
+ MO.print(dbgs());
+ dbgs() << "\nInstruction: ";
+ MI.print(dbgs());
+ dbgs() << "\n";
+ });
+ report_fatal_error("All v-regs must have been mapped to global id's");
+ }
+ // mimic llvm::hash_value(const MachineOperand &MO)
+ h = hash_combine(MO.getType(), (unsigned)RegAlias, MO.getSubReg(),
+ MO.isDef());
+ } else {
+ h = hash_value(MO);
+ }
+ Signature.push_back(h);
+ }
+ return Signature;
}
-void SPIRVModuleAnalysis::collectGlobalEntities(
- const std::vector<SPIRV::DTSortableEntry *> &DepsGraph,
- SPIRV::ModuleSectionType MSType,
- std::function<bool(const SPIRV::DTSortableEntry *)> Pred,
- bool UsePreOrder = false) {
- DenseSet<const SPIRV::DTSortableEntry *> Visited;
- for (const auto *E : DepsGraph) {
- std::function<void(const SPIRV::DTSortableEntry *)> RecHoistUtil;
- // NOTE: here we prefer recursive approach over iterative because
- // we don't expect depchains long enough to cause SO.
- RecHoistUtil = [MSType, UsePreOrder, &Visited, &Pred,
- &RecHoistUtil](const SPIRV::DTSortableEntry *E) {
- if (Visited.count(E) || !Pred(E))
- return;
- Visited.insert(E);
-
- // Traversing deps graph in post-order allows us to get rid of
- // register aliases preprocessing.
- // But pre-order is required for correct processing of function
- // declaration and arguments processing.
- if (!UsePreOrder)
- for (auto *S : E->getDeps())
- RecHoistUtil(S);
-
- Register GlobalReg = Register::index2VirtReg(MAI.getNextID());
- bool IsFirst = true;
- for (auto &U : *E) {
- const MachineFunction *MF = U.first;
- Register Reg = U.second;
- MAI.setRegisterAlias(MF, Reg, GlobalReg);
- if (!MF->getRegInfo().getUniqueVRegDef(Reg))
- continue;
- collectDefInstr(Reg, MF, &MAI, MSType, IsFirst);
- IsFirst = false;
- if (E->getIsGV())
- MAI.GlobalVarList.push_back(MF->getRegInfo().getUniqueVRegDef(Reg));
- }
+bool SPIRVModuleAnalysis::isDeclSection(const MachineRegisterInfo &MRI,
+ const MachineInstr &MI) {
+ unsigned Opcode = MI.getOpcode();
+ switch (Opcode) {
+ case SPIRV::OpTypeForwardPointer:
+ // omit now, collect later
+ return false;
+ case SPIRV::OpVariable:
+ return static_cast<SPIRV::StorageClass::StorageClass>(
+ MI.getOperand(2).getImm()) != SPIRV::StorageClass::Function;
+ case SPIRV::OpFunction:
+ case SPIRV::OpFunctionParameter:
+ return true;
+ }
+ if (GR->hasConstFunPtr() && Opcode == SPIRV::OpUndef) {
+ Register DefReg = MI.getOperand(0).getReg();
+ for (MachineInstr &UseMI : MRI.use_instructions(DefReg)) {
+ if (UseMI.getOpcode() != SPIRV::OpConstantFunctionPointerINTEL)
+ continue;
+ // it's a dummy definition, FP constant refers to a function,
+ // and this is resolved in another way; let's skip this definition
+ assert(UseMI.getOperand(2).isReg() &&
+ UseMI.getOperand(2).getReg() == DefReg);
+ MAI.setSkipEmission(&MI);
+ return false;
+ }
+ }
+ return TII->isTypeDeclInstr(MI) || TII->isConstantInstr(MI) ||
+ TII->isInlineAsmDefInstr(MI);
+}
- if (UsePreOrder)
- for (auto *S : E->getDeps())
- RecHoistUtil(S);
- };
- RecHoistUtil(E);
+// This is a special case of a function pointer refering to a possibly
+// forward function declaration. The operand is a dummy OpUndef that
+// requires a special treatment.
+void SPIRVModuleAnalysis::visitFunPtrUse(
+ Register OpReg, InstrGRegsMap &SignatureToGReg,
+ std::map<const Value *, unsigned> &GlobalToGReg, const MachineFunction *MF,
+ const MachineInstr &MI) {
+ const MachineOperand *OpFunDef =
+ GR->getFunctionDefinitionByUse(&MI.getOperand(2));
+ assert(OpFunDef && OpFunDef->isReg());
+ // find the actual function definition and number it globally in advance
+ const MachineInstr *OpDefMI = OpFunDef->getParent();
+ assert(OpDefMI && OpDefMI->getOpcode() == SPIRV::OpFunction);
+ const MachineFunction *FunDefMF = OpDefMI->getParent()->getParent();
+ const MachineRegisterInfo &FunDefMRI = FunDefMF->getRegInfo();
+ do {
+ visitDecl(FunDefMRI, SignatureToGReg, GlobalToGReg, FunDefMF, *OpDefMI);
+ OpDefMI = OpDefMI->getNextNode();
+ } while (OpDefMI && (OpDefMI->getOpcode() == SPIRV::OpFunction ||
+ OpDefMI->getOpcode() == SPIRV::OpFunctionParameter));
+ // associate the function pointer with the newly assigned global number
+ Register GlobalFunDefReg = MAI.getRegisterAlias(FunDefMF, OpFunDef->getReg());
+ assert(GlobalFunDefReg.isValid() &&
+ "Function definition must refer to a global register");
+ MAI.setRegisterAlias(MF, OpReg, GlobalFunDefReg);
+}
+
+// Depth first recursive traversal of dependencies. Repeated visits are guarded
+// by MAI.hasRegisterAlias().
+void SPIRVModuleAnalysis::visitDecl(
+ const MachineRegisterInfo &MRI, InstrGRegsMap &SignatureToGReg,
+ std::map<const Value *, unsigned> &GlobalToGReg, const MachineFunction *MF,
+ const MachineInstr &MI) {
+ unsigned Opcode = MI.getOpcode();
+ DenseSet<Register> Deps;
+
+ // Process each operand of the instruction to resolve dependencies
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg() || MO.isDef())
+ continue;
+ Register OpReg = MO.getReg();
+ // Handle function pointers special case
+ if (Opcode == SPIRV::OpConstantFunctionPointerINTEL &&
+ MRI.getRegClass(OpReg) == &SPIRV::pIDRegClass) {
+ visitFunPtrUse(OpReg, SignatureToGReg, GlobalToGReg, MF, MI);
+ continue;
+ }
+ // Skip already processed instructions
+ if (MAI.hasRegisterAlias(MF, MO.getReg()))
+ continue;
+ // Recursively visit dependencies
+ if (const MachineInstr *OpDefMI = MRI.getUniqueVRegDef(OpReg)) {
+ if (isDeclSection(MRI, *OpDefMI))
+ visitDecl(MRI, SignatureToGReg, GlobalToGReg, MF, *OpDefMI);
+ continue;
+ }
+ // Handle the unexpected case of no unique definition for the SPIR-V
+ // instruction
+ LLVM_DEBUG({
+ dbgs() << "Unexpectedly, no unique definition for the operand ";
+ MO.print(dbgs());
+ dbgs() << "\nInstruction: ";
+ MI.print(dbgs());
+ dbgs() << "\n";
+ });
+ report_fatal_error(
+ "No unique definition is found for the virtual register");
}
+
+ Register GReg;
+ bool IsFunDef = false;
+ if (TII->isSpecConstantInstr(MI)) {
+ GReg = Register::index2VirtReg(MAI.getNextID());
+ MAI.MS[SPIRV::MB_TypeConstVars].push_back(&MI);
+ } else if (Opcode == SPIRV::OpFunction ||
+ Opcode == SPIRV::OpFunctionParameter) {
+ GReg = handleFunctionOrParameter(MF, MI, GlobalToGReg, IsFunDef);
+ } else if (TII->isTypeDeclInstr(MI) || TII->isConstantInstr(MI) ||
+ TII->isInlineAsmDefInstr(MI)) {
+ GReg = handleTypeDeclOrConstant(MI, SignatureToGReg);
+ } else if (Opcode == SPIRV::OpVariable) {
+ GReg = handleVariable(MF, MI, GlobalToGReg);
+ } else {
+ LLVM_DEBUG({
+ dbgs() << "\nInstruction: ";
+ MI.print(dbgs());
+ dbgs() << "\n";
+ });
+ llvm_unreachable("Unexpected instruction is visited");
+ }
+ MAI.setRegisterAlias(MF, MI.getOperand(0).getReg(), GReg);
+ if (!IsFunDef)
+ MAI.setSkipEmission(&MI);
}
-// The function initializes global register alias table for types, consts,
-// global vars and func decls and collects these instruction for output
-// at module level. Also it collects explicit OpExtension/OpCapability
-// instructions.
-void SPIRVModuleAnalysis::processDefInstrs(const Module &M) {
- std::vector<SPIRV::DTSortableEntry *> DepsGraph;
+Register SPIRVModuleAnalysis::handleFunctionOrParameter(
+ const MachineFunction *MF, const MachineInstr &MI,
+ std::map<const Value *, unsigned> &GlobalToGReg, bool &IsFunDef) {
+ const Value *GObj = GR->getGlobalObject(MF, MI.getOperand(0).getReg());
+ assert(GObj && "Unregistered global definition");
+ const Function *F = dyn_cast<Function>(GObj);
+ if (!F)
+ F = dyn_cast<Argument>(GObj)->getParent();
+ assert(F && "Expected a reference to a function or an argument");
+ IsFunDef = !F->isDeclaration();
+ auto It = GlobalToGReg.find(GObj);
+ if (It != GlobalToGReg.end())
+ return It->second;
+ Register GReg = Register::index2VirtReg(MAI.getNextID());
+ GlobalToGReg[GObj] = GReg;
+ if (!IsFunDef)
+ MAI.MS[SPIRV::MB_ExtFuncDecls].push_back(&MI);
+ return GReg;
+}
- GR->buildDepsGraph(DepsGraph, TII, SPVDumpDeps ? MMI : nullptr);
+Register
+SPIRVModuleAnalysis::handleTypeDeclOrConstant(const MachineInstr &MI,
+ InstrGRegsMap &SignatureToGReg) {
+ InstrSignature MISign = instrToSignature(MI, MAI, false);
+ auto It = SignatureToGReg.find(MISign);
+ if (It != SignatureToGReg.end())
+ return It->second;
+ Register GReg = Register::index2VirtReg(MAI.getNextID());
+ SignatureToGReg[MISign] = GReg;
+ MAI.MS[SPIRV::MB_TypeConstVars].push_back(&MI);
+ return GReg;
+}
- collectGlobalEntities(
- DepsGraph, SPIRV::MB_TypeConstVars,
- [](const SPIRV::DTSortableEntry *E) { return !E->getIsFunc(); });
+Register SPIRVModuleAnalysis::handleVariable(
+ const MachineFunction *MF, const MachineInstr &MI,
+ std::map<const Value *, unsigned> &GlobalToGReg) {
+ MAI.GlobalVarList.push_back(&MI);
+ const Value *GObj = GR->getGlobalObject(MF, MI.getOperand(0).getReg());
+ assert(GObj && "Unregistered global definition");
+ auto It = GlobalToGReg.find(GObj);
+ if (It != GlobalToGReg.end())
+ return It->second;
+ Register GReg = Register::index2VirtReg(MAI.getNextID());
+ GlobalToGReg[GObj] = GReg;
+ MAI.MS[SPIRV::MB_TypeConstVars].push_back(&MI);
+ return GReg;
+}
+void SPIRVModuleAnalysis::collectDeclarations(const Module &M) {
+ InstrGRegsMap SignatureToGReg;
+ std::map<const Value *, unsigned> GlobalToGReg;
for (auto F = M.begin(), E = M.end(); F != E; ++F) {
MachineFunction *MF = MMI->getMachineFunction(*F);
if (!MF)
continue;
- // Iterate through and collect OpExtension/OpCapability instructions.
+ const MachineRegisterInfo &MRI = MF->getRegInfo();
+ unsigned PastHeader = 0;
for (MachineBasicBlock &MBB : *MF) {
for (MachineInstr &MI : MBB) {
- if (MI.getOpcode() == SPIRV::OpExtension) {
- // Here, OpExtension just has a single enum operand, not a string.
- auto Ext = SPIRV::Extension::Extension(MI.getOperand(0).getImm());
- MAI.Reqs.addExtension(Ext);
+ if (MI.getNumOperands() == 0)
+ continue;
+ unsigned Opcode = MI.getOpcode();
+ if (Opcode == SPIRV::OpFunction) {
+ if (PastHeader == 0) {
+ PastHeader = 1;
+ continue;
+ }
+ } else if (Opcode == SPIRV::OpFunctionParameter) {
+ if (PastHeader < 2)
+ continue;
+ } else if (PastHeader > 0) {
+ PastHeader = 2;
+ }
+
+ const MachineOperand &DefMO = MI.getOperand(0);
+ switch (Opcode) {
+ case SPIRV::OpExtension:
+ MAI.Reqs.addExtension(SPIRV::Extension::Extension(DefMO.getImm()));
MAI.setSkipEmission(&MI);
- } else if (MI.getOpcode() == SPIRV::OpCapability) {
- auto Cap = SPIRV::Capability::Capability(MI.getOperand(0).getImm());
- MAI.Reqs.addCapability(Cap);
+ break;
+ case SPIRV::OpCapability:
+ MAI.Reqs.addCapability(SPIRV::Capability::Capability(DefMO.getImm()));
MAI.setSkipEmission(&MI);
+ if (PastHeader > 0)
+ PastHeader = 2;
+ break;
+ default:
+ if (DefMO.isReg() && isDeclSection(MRI, MI) &&
+ !MAI.hasRegisterAlias(MF, DefMO.getReg()))
+ visitDecl(MRI, SignatureToGReg, GlobalToGReg, MF, MI);
}
}
}
}
-
- collectGlobalEntities(
- DepsGraph, SPIRV::MB_ExtFuncDecls,
- [](const SPIRV::DTSortableEntry *E) { return E->getIsFunc(); }, true);
}
// Look for IDs declared with Import linkage, and map the corresponding function
@@ -342,58 +502,6 @@ void SPIRVModuleAnalysis::collectFuncNames(MachineInstr &MI,
}
}
-// References to a function via function pointers generate virtual
-// registers without a definition. We are able to resolve this
-// reference using Globar Register info into an OpFunction instruction
-// and replace dummy operands by the corresponding global register references.
-void SPIRVModuleAnalysis::collectFuncPtrs() {
- for (auto &MI : MAI.MS[SPIRV::MB_TypeConstVars])
- if (MI->getOpcode() == SPIRV::OpConstantFunctionPointerINTEL)
- collectFuncPtrs(MI);
-}
-
-void SPIRVModuleAnalysis::collectFuncPtrs(MachineInstr *MI) {
- const MachineOperand *FunUse = &MI->getOperand(2);
- if (const MachineOperand *FunDef = GR->getFunctionDefinitionByUse(FunUse)) {
- const MachineInstr *FunDefMI = FunDef->getParent();
- assert(FunDefMI->getOpcode() == SPIRV::OpFunction &&
- "Constant function pointer must refer to function definition");
- Register FunDefReg = FunDef->getReg();
- Register GlobalFunDefReg =
- MAI.getRegisterAlias(FunDefMI->getMF(), FunDefReg);
- assert(GlobalFunDefReg.isValid() &&
- "Function definition must refer to a global register");
- Register FunPtrReg = FunUse->getReg();
- MAI.setRegisterAlias(MI->getMF(), FunPtrReg, GlobalFunDefReg);
- }
-}
-
-using InstrSignature = SmallVector<size_t>;
-using InstrTraces = std::set<InstrSignature>;
-
-// Returns a representation of an instruction as a vector of MachineOperand
-// hash values, see llvm::hash_value(const MachineOperand &MO) for details.
-// This creates a signature of the instruction with the same content
-// that MachineOperand::isIdenticalTo uses for comparison.
-static InstrSignature instrToSignature(MachineInstr &MI,
- SPIRV::ModuleAnalysisInfo &MAI) {
- InstrSignature Signature;
- for (unsigned i = 0; i < MI.getNumOperands(); ++i) {
- const MachineOperand &MO = MI.getOperand(i);
- size_t h;
- if (MO.isReg()) {
- Register RegAlias = MAI.getRegisterAlias(MI.getMF(), MO.getReg());
- // mimic llvm::hash_value(const MachineOperand &MO)
- h = hash_combine(MO.getType(), (unsigned)RegAlias, MO.getSubReg(),
- MO.isDef());
- } else {
- h = hash_value(MO);
- }
- Signature.push_back(h);
- }
- return Signature;
-}
-
// Collect the given instruction in the specified MS. We assume global register
// numbering has already occurred by this point. We can directly compare reg
// arguments when detecting duplicates.
@@ -401,7 +509,7 @@ static void collectOtherInstr(MachineInstr &MI, SPIRV::ModuleAnalysisInfo &MAI,
SPIRV::ModuleSectionType MSType, InstrTraces &IS,
bool Append = true) {
MAI.setSkipEmission(&MI);
- InstrSignature MISign = instrToSignature(MI, MAI);
+ InstrSignature MISign = instrToSignature(MI, MAI, true);
auto FoundMI = IS.insert(MISign);
if (!FoundMI.second)
return; // insert failed, so we found a duplicate; don't add it to MAI.MS
@@ -465,7 +573,7 @@ void SPIRVModuleAnalysis::processOtherInstrs(const Module &M) {
// Number registers in all functions globally from 0 onwards and store
// the result in global register alias table. Some registers are already
-// numbered in collectGlobalEntities.
+// numbered.
void SPIRVModuleAnalysis::numberRegistersGlobally(const Module &M) {
for (auto F = M.begin(), E = M.end(); F != E; ++F) {
if ((*F).isDeclaration())
@@ -1835,15 +1943,11 @@ bool SPIRVModuleAnalysis::runOnModule(Module &M) {
// Process type/const/global var/func decl instructions, number their
// destination registers from 0 to N, collect Extensions and Capabilities.
- processDefInstrs(M);
+ collectDeclarations(M);
// Number rest of registers from N+1 onwards.
numberRegistersGlobally(M);
- // Update references to OpFunction instructions to use Global Registers
- if (GR->hasConstFunPtr())
- collectFuncPtrs();
-
// Collect OpName, OpEntryPoint, OpDecorate etc, process other instructions.
processOtherInstrs(M);
diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h
index ee2aaf1..79b5444 100644
--- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h
+++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h
@@ -124,7 +124,7 @@ public:
const Capability::Capability IfPresent);
};
-using InstrList = SmallVector<MachineInstr *>;
+using InstrList = SmallVector<const MachineInstr *>;
// Maps a local register to the corresponding global alias.
using LocalToGlobalRegTable = std::map<Register, Register>;
using RegisterAliasMapTy =
@@ -142,12 +142,12 @@ struct ModuleAnalysisInfo {
// Maps ExtInstSet to corresponding ID register.
DenseMap<unsigned, Register> ExtInstSetMap;
// Contains the list of all global OpVariables in the module.
- SmallVector<MachineInstr *, 4> GlobalVarList;
+ SmallVector<const MachineInstr *, 4> GlobalVarList;
// Maps functions to corresponding function ID registers.
DenseMap<const Function *, Register> FuncMap;
// The set contains machine instructions which are necessary
// for correct MIR but will not be emitted in function bodies.
- DenseSet<MachineInstr *> InstrsToDelete;
+ DenseSet<const MachineInstr *> InstrsToDelete;
// The table contains global aliases of local registers for each machine
// function. The aliases are used to substitute local registers during
// code emission.
@@ -167,7 +167,7 @@ struct ModuleAnalysisInfo {
}
Register getExtInstSetReg(unsigned SetNum) { return ExtInstSetMap[SetNum]; }
InstrList &getMSInstrs(unsigned MSType) { return MS[MSType]; }
- void setSkipEmission(MachineInstr *MI) { InstrsToDelete.insert(MI); }
+ void setSkipEmission(const MachineInstr *MI) { InstrsToDelete.insert(MI); }
bool getSkipEmission(const MachineInstr *MI) {
return InstrsToDelete.contains(MI);
}
@@ -204,6 +204,10 @@ struct ModuleAnalysisInfo {
};
} // namespace SPIRV
+using InstrSignature = SmallVector<size_t>;
+using InstrTraces = std::set<InstrSignature>;
+using InstrGRegsMap = std::map<SmallVector<size_t>, unsigned>;
+
struct SPIRVModuleAnalysis : public ModulePass {
static char ID;
@@ -216,17 +220,27 @@ public:
private:
void setBaseInfo(const Module &M);
- void collectGlobalEntities(
- const std::vector<SPIRV::DTSortableEntry *> &DepsGraph,
- SPIRV::ModuleSectionType MSType,
- std::function<bool(const SPIRV::DTSortableEntry *)> Pred,
- bool UsePreOrder);
- void processDefInstrs(const Module &M);
void collectFuncNames(MachineInstr &MI, const Function *F);
void processOtherInstrs(const Module &M);
void numberRegistersGlobally(const Module &M);
- void collectFuncPtrs();
- void collectFuncPtrs(MachineInstr *MI);
+
+ // analyze dependencies to collect module scope definitions
+ void collectDeclarations(const Module &M);
+ void visitDecl(const MachineRegisterInfo &MRI, InstrGRegsMap &SignatureToGReg,
+ std::map<const Value *, unsigned> &GlobalToGReg,
+ const MachineFunction *MF, const MachineInstr &MI);
+ Register handleVariable(const MachineFunction *MF, const MachineInstr &MI,
+ std::map<const Value *, unsigned> &GlobalToGReg);
+ Register handleTypeDeclOrConstant(const MachineInstr &MI,
+ InstrGRegsMap &SignatureToGReg);
+ Register
+ handleFunctionOrParameter(const MachineFunction *MF, const MachineInstr &MI,
+ std::map<const Value *, unsigned> &GlobalToGReg,
+ bool &IsFunDef);
+ void visitFunPtrUse(Register OpReg, InstrGRegsMap &SignatureToGReg,
+ std::map<const Value *, unsigned> &GlobalToGReg,
+ const MachineFunction *MF, const MachineInstr &MI);
+ bool isDeclSection(const MachineRegisterInfo &MRI, const MachineInstr &MI);
const SPIRVSubtarget *ST;
SPIRVGlobalRegistry *GR;
diff --git a/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp b/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp
index 8357c30..5b4c849 100644
--- a/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp
@@ -58,9 +58,10 @@ addConstantsToTrack(MachineFunction &MF, SPIRVGlobalRegistry *GR,
->getValue());
if (auto *GV = dyn_cast<GlobalValue>(Const)) {
Register Reg = GR->find(GV, &MF);
- if (!Reg.isValid())
+ if (!Reg.isValid()) {
GR->add(GV, &MF, SrcReg);
- else
+ GR->addGlobalObject(GV, &MF, SrcReg);
+ } else
RegsAlreadyAddedToDT[&MI] = Reg;
} else {
Register Reg = GR->find(Const, &MF);
diff --git a/llvm/lib/Target/SPIRV/SPIRVUtils.h b/llvm/lib/Target/SPIRV/SPIRVUtils.h
index da2e24c..60649ea 100644
--- a/llvm/lib/Target/SPIRV/SPIRVUtils.h
+++ b/llvm/lib/Target/SPIRV/SPIRVUtils.h
@@ -17,6 +17,7 @@
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/IR/Dominators.h"
+#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/TypedPointerType.h"
#include <queue>
@@ -236,6 +237,10 @@ Type *parseBasicTypeName(StringRef &TypeName, LLVMContext &Ctx);
// Returns true if the function was changed.
bool sortBlocks(Function &F);
+inline bool hasInitializer(const GlobalVariable *GV) {
+ return GV->hasInitializer() && !isa<UndefValue>(GV->getInitializer());
+}
+
// True if this is an instance of TypedPointerType.
inline bool isTypedPointerTy(const Type *T) {
return T && T->getTypeID() == Type::TypedPointerTyID;
diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp
index 06d7d42..16e7d05 100644
--- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp
+++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp
@@ -29,19 +29,14 @@ SystemZMCAsmInfoGOFF::SystemZMCAsmInfoGOFF(const Triple &TT) {
AllowAtInName = true;
AllowAtAtStartOfIdentifier = true;
AllowDollarAtStartOfIdentifier = true;
- AllowHashAtStartOfIdentifier = true;
AssemblerDialect = AD_HLASM;
CalleeSaveStackSlotSize = 8;
CodePointerSize = 8;
CommentString = "*";
- DotIsPC = false;
- EmitGNUAsmStartIndentationMarker = false;
- EmitLabelsInUpperCase = true;
ExceptionsType = ExceptionHandling::ZOS;
+ IsHLASM = true;
IsLittleEndian = false;
MaxInstLength = 6;
- RestrictCommentStringToStartOfStatement = true;
- StarIsPC = true;
SupportsDebugInformation = true;
}
diff --git a/llvm/lib/Target/TargetMachine.cpp b/llvm/lib/Target/TargetMachine.cpp
index c0985f3..d5365f3 100644
--- a/llvm/lib/Target/TargetMachine.cpp
+++ b/llvm/lib/Target/TargetMachine.cpp
@@ -204,7 +204,7 @@ bool TargetMachine::shouldAssumeDSOLocal(const GlobalValue *GV) const {
// don't assume the variables to be DSO local unless we actually know
// that for sure. This only has to be done for variables; for functions
// the linker can insert thunks for calling functions from another DLL.
- if (TT.isWindowsGNUEnvironment() && GV->isDeclarationForLinker() &&
+ if (TT.isOSCygMing() && GV->isDeclarationForLinker() &&
isa<GlobalVariable>(GV))
return false;
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp
index b67c573..abe0cc6 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp
@@ -140,8 +140,8 @@ bool X86ATTInstPrinter::printVecCompareInstr(const MCInst *MI,
case X86::VCMPPSZ128rmik: case X86::VCMPPSZ128rrik:
case X86::VCMPPSZ256rmik: case X86::VCMPPSZ256rrik:
case X86::VCMPPSZrmik: case X86::VCMPPSZrrik:
- case X86::VCMPSDZrmi_Intk: case X86::VCMPSDZrri_Intk:
- case X86::VCMPSSZrmi_Intk: case X86::VCMPSSZrri_Intk:
+ case X86::VCMPSDZrmik_Int: case X86::VCMPSDZrrik_Int:
+ case X86::VCMPSSZrmik_Int: case X86::VCMPSSZrrik_Int:
case X86::VCMPPDZ128rmbi: case X86::VCMPPDZ128rmbik:
case X86::VCMPPDZ256rmbi: case X86::VCMPPDZ256rmbik:
case X86::VCMPPDZrmbi: case X86::VCMPPDZrmbik:
@@ -150,8 +150,8 @@ bool X86ATTInstPrinter::printVecCompareInstr(const MCInst *MI,
case X86::VCMPPSZrmbi: case X86::VCMPPSZrmbik:
case X86::VCMPPDZrrib: case X86::VCMPPDZrribk:
case X86::VCMPPSZrrib: case X86::VCMPPSZrribk:
- case X86::VCMPSDZrrib_Int: case X86::VCMPSDZrrib_Intk:
- case X86::VCMPSSZrrib_Int: case X86::VCMPSSZrrib_Intk:
+ case X86::VCMPSDZrrib_Int: case X86::VCMPSDZrribk_Int:
+ case X86::VCMPSSZrrib_Int: case X86::VCMPSSZrribk_Int:
case X86::VCMPPHZ128rmi: case X86::VCMPPHZ128rri:
case X86::VCMPPHZ256rmi: case X86::VCMPPHZ256rri:
case X86::VCMPPHZrmi: case X86::VCMPPHZrri:
@@ -160,12 +160,12 @@ bool X86ATTInstPrinter::printVecCompareInstr(const MCInst *MI,
case X86::VCMPPHZ128rmik: case X86::VCMPPHZ128rrik:
case X86::VCMPPHZ256rmik: case X86::VCMPPHZ256rrik:
case X86::VCMPPHZrmik: case X86::VCMPPHZrrik:
- case X86::VCMPSHZrmi_Intk: case X86::VCMPSHZrri_Intk:
+ case X86::VCMPSHZrmik_Int: case X86::VCMPSHZrrik_Int:
case X86::VCMPPHZ128rmbi: case X86::VCMPPHZ128rmbik:
case X86::VCMPPHZ256rmbi: case X86::VCMPPHZ256rmbik:
case X86::VCMPPHZrmbi: case X86::VCMPPHZrmbik:
case X86::VCMPPHZrrib: case X86::VCMPPHZrribk:
- case X86::VCMPSHZrrib_Int: case X86::VCMPSHZrrib_Intk:
+ case X86::VCMPSHZrrib_Int: case X86::VCMPSHZrribk_Int:
case X86::VCMPPBF16Z128rmi: case X86::VCMPPBF16Z128rri:
case X86::VCMPPBF16Z256rmi: case X86::VCMPPBF16Z256rri:
case X86::VCMPPBF16Zrmi: case X86::VCMPPBF16Zrri:
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp
index 9f8bc57..681d0da 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp
@@ -40,6 +40,17 @@ using namespace llvm;
CASE_MASK_INS_COMMON(Inst, Suffix, src) \
CASE_MASKZ_INS_COMMON(Inst, Suffix, src)
+#define CASE_MASK_INS_COMMON_INT(Inst, Suffix, src) \
+ case X86::V##Inst##Suffix##src##k_Int:
+
+#define CASE_MASKZ_INS_COMMON_INT(Inst, Suffix, src) \
+ case X86::V##Inst##Suffix##src##kz_Int:
+
+#define CASE_AVX512_INS_COMMON_INT(Inst, Suffix, src) \
+ CASE_AVX_INS_COMMON(Inst, Suffix, src##_Int) \
+ CASE_MASK_INS_COMMON_INT(Inst, Suffix, src) \
+ CASE_MASKZ_INS_COMMON_INT(Inst, Suffix, src)
+
#define CASE_FPCLASS_PACKED(Inst, src) \
CASE_AVX_INS_COMMON(Inst, Z, src##i) \
CASE_AVX_INS_COMMON(Inst, Z256, src##i) \
@@ -196,8 +207,8 @@ using namespace llvm;
CASE_AVX_INS_COMMON(Inst##SS, , r_Int) \
CASE_AVX_INS_COMMON(Inst##SD, Z, r) \
CASE_AVX_INS_COMMON(Inst##SS, Z, r) \
- CASE_AVX512_INS_COMMON(Inst##SD, Z, r_Int) \
- CASE_AVX512_INS_COMMON(Inst##SS, Z, r_Int)
+ CASE_AVX512_INS_COMMON_INT(Inst##SD, Z, r) \
+ CASE_AVX512_INS_COMMON_INT(Inst##SS, Z, r)
#define CASE_FMA_SCALAR_MEM(Inst) \
CASE_AVX_INS_COMMON(Inst##SD, , m) \
@@ -206,8 +217,8 @@ using namespace llvm;
CASE_AVX_INS_COMMON(Inst##SS, , m_Int) \
CASE_AVX_INS_COMMON(Inst##SD, Z, m) \
CASE_AVX_INS_COMMON(Inst##SS, Z, m) \
- CASE_AVX512_INS_COMMON(Inst##SD, Z, m_Int) \
- CASE_AVX512_INS_COMMON(Inst##SS, Z, m_Int)
+ CASE_AVX512_INS_COMMON_INT(Inst##SD, Z, m) \
+ CASE_AVX512_INS_COMMON_INT(Inst##SS, Z, m)
#define CASE_FMA4(Inst, suf) \
CASE_AVX_INS_COMMON(Inst, 4, suf) \
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp
index fafcc73..01e2d4ac 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp
@@ -277,8 +277,8 @@ void X86InstPrinterCommon::printCMPMnemonic(const MCInst *MI, bool IsVCmp,
case X86::VCMPSDrmi_Int: case X86::VCMPSDrri_Int:
case X86::VCMPSDZrmi: case X86::VCMPSDZrri:
case X86::VCMPSDZrmi_Int: case X86::VCMPSDZrri_Int:
- case X86::VCMPSDZrmi_Intk: case X86::VCMPSDZrri_Intk:
- case X86::VCMPSDZrrib_Int: case X86::VCMPSDZrrib_Intk:
+ case X86::VCMPSDZrmik_Int: case X86::VCMPSDZrrik_Int:
+ case X86::VCMPSDZrrib_Int: case X86::VCMPSDZrribk_Int:
OS << "sd\t";
break;
case X86::CMPSSrmi: case X86::CMPSSrri:
@@ -287,8 +287,8 @@ void X86InstPrinterCommon::printCMPMnemonic(const MCInst *MI, bool IsVCmp,
case X86::VCMPSSrmi_Int: case X86::VCMPSSrri_Int:
case X86::VCMPSSZrmi: case X86::VCMPSSZrri:
case X86::VCMPSSZrmi_Int: case X86::VCMPSSZrri_Int:
- case X86::VCMPSSZrmi_Intk: case X86::VCMPSSZrri_Intk:
- case X86::VCMPSSZrrib_Int: case X86::VCMPSSZrrib_Intk:
+ case X86::VCMPSSZrmik_Int: case X86::VCMPSSZrrik_Int:
+ case X86::VCMPSSZrrib_Int: case X86::VCMPSSZrribk_Int:
OS << "ss\t";
break;
case X86::VCMPPHZ128rmi: case X86::VCMPPHZ128rri:
@@ -305,8 +305,8 @@ void X86InstPrinterCommon::printCMPMnemonic(const MCInst *MI, bool IsVCmp,
break;
case X86::VCMPSHZrmi: case X86::VCMPSHZrri:
case X86::VCMPSHZrmi_Int: case X86::VCMPSHZrri_Int:
- case X86::VCMPSHZrrib_Int: case X86::VCMPSHZrrib_Intk:
- case X86::VCMPSHZrmi_Intk: case X86::VCMPSHZrri_Intk:
+ case X86::VCMPSHZrrib_Int: case X86::VCMPSHZrribk_Int:
+ case X86::VCMPSHZrmik_Int: case X86::VCMPSHZrrik_Int:
OS << "sh\t";
break;
case X86::VCMPPBF16Z128rmi: case X86::VCMPPBF16Z128rri:
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp
index 6800926..c26dc2c 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp
@@ -119,8 +119,8 @@ bool X86IntelInstPrinter::printVecCompareInstr(const MCInst *MI, raw_ostream &OS
case X86::VCMPPSZ128rmik: case X86::VCMPPSZ128rrik:
case X86::VCMPPSZ256rmik: case X86::VCMPPSZ256rrik:
case X86::VCMPPSZrmik: case X86::VCMPPSZrrik:
- case X86::VCMPSDZrmi_Intk: case X86::VCMPSDZrri_Intk:
- case X86::VCMPSSZrmi_Intk: case X86::VCMPSSZrri_Intk:
+ case X86::VCMPSDZrmik_Int: case X86::VCMPSDZrrik_Int:
+ case X86::VCMPSSZrmik_Int: case X86::VCMPSSZrrik_Int:
case X86::VCMPPDZ128rmbi: case X86::VCMPPDZ128rmbik:
case X86::VCMPPDZ256rmbi: case X86::VCMPPDZ256rmbik:
case X86::VCMPPDZrmbi: case X86::VCMPPDZrmbik:
@@ -129,8 +129,8 @@ bool X86IntelInstPrinter::printVecCompareInstr(const MCInst *MI, raw_ostream &OS
case X86::VCMPPSZrmbi: case X86::VCMPPSZrmbik:
case X86::VCMPPDZrrib: case X86::VCMPPDZrribk:
case X86::VCMPPSZrrib: case X86::VCMPPSZrribk:
- case X86::VCMPSDZrrib_Int: case X86::VCMPSDZrrib_Intk:
- case X86::VCMPSSZrrib_Int: case X86::VCMPSSZrrib_Intk:
+ case X86::VCMPSDZrrib_Int: case X86::VCMPSDZrribk_Int:
+ case X86::VCMPSSZrrib_Int: case X86::VCMPSSZrribk_Int:
case X86::VCMPPHZ128rmi: case X86::VCMPPHZ128rri:
case X86::VCMPPHZ256rmi: case X86::VCMPPHZ256rri:
case X86::VCMPPHZrmi: case X86::VCMPPHZrri:
@@ -139,12 +139,12 @@ bool X86IntelInstPrinter::printVecCompareInstr(const MCInst *MI, raw_ostream &OS
case X86::VCMPPHZ128rmik: case X86::VCMPPHZ128rrik:
case X86::VCMPPHZ256rmik: case X86::VCMPPHZ256rrik:
case X86::VCMPPHZrmik: case X86::VCMPPHZrrik:
- case X86::VCMPSHZrmi_Intk: case X86::VCMPSHZrri_Intk:
+ case X86::VCMPSHZrmik_Int: case X86::VCMPSHZrrik_Int:
case X86::VCMPPHZ128rmbi: case X86::VCMPPHZ128rmbik:
case X86::VCMPPHZ256rmbi: case X86::VCMPPHZ256rmbik:
case X86::VCMPPHZrmbi: case X86::VCMPPHZrmbik:
case X86::VCMPPHZrrib: case X86::VCMPPHZrribk:
- case X86::VCMPSHZrrib_Int: case X86::VCMPSHZrrib_Intk:
+ case X86::VCMPSHZrrib_Int: case X86::VCMPSHZrribk_Int:
case X86::VCMPPBF16Z128rmi: case X86::VCMPPBF16Z128rri:
case X86::VCMPPBF16Z256rmi: case X86::VCMPPBF16Z256rri:
case X86::VCMPPBF16Zrmi: case X86::VCMPPBF16Zrri:
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 3b260a8..6b0eb38 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -94,7 +94,7 @@ static cl::opt<int> BrMergingCcmpBias(
static cl::opt<bool>
WidenShift("x86-widen-shift", cl::init(true),
- cl::desc("Replacte narrow shifts with wider shifts."),
+ cl::desc("Replace narrow shifts with wider shifts."),
cl::Hidden);
static cl::opt<int> BrMergingLikelyBias(
@@ -341,8 +341,17 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
}
}
if (Subtarget.hasAVX10_2()) {
- setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i32, Legal);
- setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i32, Legal);
+ setOperationAction(ISD::FP_TO_UINT_SAT, MVT::v2i32, Custom);
+ setOperationAction(ISD::FP_TO_SINT_SAT, MVT::v2i32, Custom);
+ for (MVT VT : {MVT::i32, MVT::v4i32, MVT::v8i32, MVT::v16i32, MVT::v2i64,
+ MVT::v4i64}) {
+ setOperationAction(ISD::FP_TO_UINT_SAT, VT, Legal);
+ setOperationAction(ISD::FP_TO_SINT_SAT, VT, Legal);
+ }
+ if (Subtarget.hasAVX10_2_512()) {
+ setOperationAction(ISD::FP_TO_UINT_SAT, MVT::v8i64, Legal);
+ setOperationAction(ISD::FP_TO_SINT_SAT, MVT::v8i64, Legal);
+ }
if (Subtarget.is64Bit()) {
setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i64, Legal);
setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i64, Legal);
@@ -623,6 +632,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FMAXNUM, VT, Action);
setOperationAction(ISD::FMINIMUM, VT, Action);
setOperationAction(ISD::FMAXIMUM, VT, Action);
+ setOperationAction(ISD::FMINIMUMNUM, VT, Action);
+ setOperationAction(ISD::FMAXIMUMNUM, VT, Action);
setOperationAction(ISD::FSIN, VT, Action);
setOperationAction(ISD::FCOS, VT, Action);
setOperationAction(ISD::FSINCOS, VT, Action);
@@ -1066,6 +1077,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FMAXIMUM, MVT::f32, Custom);
setOperationAction(ISD::FMINIMUM, MVT::f32, Custom);
+ setOperationAction(ISD::FMAXIMUMNUM, MVT::f32, Custom);
+ setOperationAction(ISD::FMINIMUMNUM, MVT::f32, Custom);
setOperationAction(ISD::FNEG, MVT::v4f32, Custom);
setOperationAction(ISD::FABS, MVT::v4f32, Custom);
@@ -1108,6 +1121,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
for (auto VT : { MVT::f64, MVT::v4f32, MVT::v2f64 }) {
setOperationAction(ISD::FMAXIMUM, VT, Custom);
setOperationAction(ISD::FMINIMUM, VT, Custom);
+ setOperationAction(ISD::FMAXIMUMNUM, VT, Custom);
+ setOperationAction(ISD::FMINIMUMNUM, VT, Custom);
}
for (auto VT : { MVT::v2i8, MVT::v4i8, MVT::v8i8,
@@ -1473,6 +1488,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FMAXIMUM, VT, Custom);
setOperationAction(ISD::FMINIMUM, VT, Custom);
+ setOperationAction(ISD::FMAXIMUMNUM, VT, Custom);
+ setOperationAction(ISD::FMINIMUMNUM, VT, Custom);
setOperationAction(ISD::FCANONICALIZE, VT, Custom);
}
@@ -1818,6 +1835,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
for (MVT VT : { MVT::v16f32, MVT::v8f64 }) {
setOperationAction(ISD::FMAXIMUM, VT, Custom);
setOperationAction(ISD::FMINIMUM, VT, Custom);
+ setOperationAction(ISD::FMAXIMUMNUM, VT, Custom);
+ setOperationAction(ISD::FMINIMUMNUM, VT, Custom);
setOperationAction(ISD::FNEG, VT, Custom);
setOperationAction(ISD::FABS, VT, Custom);
setOperationAction(ISD::FMA, VT, Legal);
@@ -2289,6 +2308,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom);
setOperationAction(ISD::FMAXIMUM, MVT::f16, Custom);
setOperationAction(ISD::FMINIMUM, MVT::f16, Custom);
+ setOperationAction(ISD::FMAXIMUMNUM, MVT::f16, Custom);
+ setOperationAction(ISD::FMINIMUMNUM, MVT::f16, Custom);
setOperationAction(ISD::FP_EXTEND, MVT::f32, Legal);
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal);
@@ -2333,6 +2354,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setLoadExtAction(ISD::EXTLOAD, MVT::v8f64, MVT::v8f16, Legal);
setLoadExtAction(ISD::EXTLOAD, MVT::v16f32, MVT::v16f16, Legal);
+
+ setOperationAction(ISD::FMINIMUM, MVT::v32f16, Custom);
+ setOperationAction(ISD::FMAXIMUM, MVT::v32f16, Custom);
+ setOperationAction(ISD::FMINIMUMNUM, MVT::v32f16, Custom);
+ setOperationAction(ISD::FMAXIMUMNUM, MVT::v32f16, Custom);
}
if (Subtarget.hasVLX()) {
@@ -2377,6 +2403,16 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// Need to custom widen these to prevent scalarization.
setOperationAction(ISD::LOAD, MVT::v4f16, Custom);
setOperationAction(ISD::STORE, MVT::v4f16, Custom);
+
+ setOperationAction(ISD::FMINIMUM, MVT::v8f16, Custom);
+ setOperationAction(ISD::FMAXIMUM, MVT::v8f16, Custom);
+ setOperationAction(ISD::FMINIMUMNUM, MVT::v8f16, Custom);
+ setOperationAction(ISD::FMAXIMUMNUM, MVT::v8f16, Custom);
+
+ setOperationAction(ISD::FMINIMUM, MVT::v16f16, Custom);
+ setOperationAction(ISD::FMAXIMUM, MVT::v16f16, Custom);
+ setOperationAction(ISD::FMINIMUMNUM, MVT::v16f16, Custom);
+ setOperationAction(ISD::FMAXIMUMNUM, MVT::v16f16, Custom);
}
}
@@ -2433,6 +2469,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FSQRT, VT, Legal);
setOperationAction(ISD::FMA, VT, Legal);
setOperationAction(ISD::SETCC, VT, Custom);
+ setOperationAction(ISD::FMINIMUM, VT, Custom);
+ setOperationAction(ISD::FMAXIMUM, VT, Custom);
+ setOperationAction(ISD::FMINIMUMNUM, VT, Custom);
+ setOperationAction(ISD::FMAXIMUMNUM, VT, Custom);
}
if (Subtarget.hasAVX10_2_512()) {
setOperationAction(ISD::FADD, MVT::v32bf16, Legal);
@@ -2442,6 +2482,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FSQRT, MVT::v32bf16, Legal);
setOperationAction(ISD::FMA, MVT::v32bf16, Legal);
setOperationAction(ISD::SETCC, MVT::v32bf16, Custom);
+ setOperationAction(ISD::FMINIMUM, MVT::v32bf16, Custom);
+ setOperationAction(ISD::FMAXIMUM, MVT::v32bf16, Custom);
+ setOperationAction(ISD::FMINIMUMNUM, MVT::v32bf16, Custom);
+ setOperationAction(ISD::FMAXIMUMNUM, MVT::v32bf16, Custom);
}
for (auto VT : {MVT::f16, MVT::f32, MVT::f64}) {
setCondCodeAction(ISD::SETOEQ, VT, Custom);
@@ -2643,6 +2687,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
ISD::UINT_TO_FP,
ISD::STRICT_SINT_TO_FP,
ISD::STRICT_UINT_TO_FP,
+ ISD::FP_TO_SINT_SAT,
+ ISD::FP_TO_UINT_SAT,
ISD::SETCC,
ISD::MUL,
ISD::XOR,
@@ -28826,19 +28872,35 @@ static SDValue LowerMINMAX(SDValue Op, const X86Subtarget &Subtarget,
static SDValue LowerFMINIMUM_FMAXIMUM(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
- assert((Op.getOpcode() == ISD::FMAXIMUM || Op.getOpcode() == ISD::FMINIMUM) &&
- "Expected FMAXIMUM or FMINIMUM opcode");
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
EVT VT = Op.getValueType();
SDValue X = Op.getOperand(0);
SDValue Y = Op.getOperand(1);
SDLoc DL(Op);
+ bool IsMaxOp =
+ Op.getOpcode() == ISD::FMAXIMUM || Op.getOpcode() == ISD::FMAXIMUMNUM;
+ bool IsNum =
+ Op.getOpcode() == ISD::FMINIMUMNUM || Op.getOpcode() == ISD::FMAXIMUMNUM;
+ if (Subtarget.hasAVX10_2() && TLI.isTypeLegal(VT)) {
+ unsigned Opc = 0;
+ if (VT.isVector())
+ Opc = X86ISD::VMINMAX;
+ else if (VT == MVT::f16 || VT == MVT::f32 || VT == MVT::f64)
+ Opc = X86ISD::VMINMAXS;
+
+ if (Opc) {
+ SDValue Imm =
+ DAG.getTargetConstant(IsMaxOp + (IsNum ? 16 : 0), DL, MVT::i32);
+ return DAG.getNode(Opc, DL, VT, X, Y, Imm, Op->getFlags());
+ }
+ }
+
uint64_t SizeInBits = VT.getScalarSizeInBits();
APInt PreferredZero = APInt::getZero(SizeInBits);
APInt OppositeZero = PreferredZero;
EVT IVT = VT.changeTypeToInteger();
X86ISD::NodeType MinMaxOp;
- if (Op.getOpcode() == ISD::FMAXIMUM) {
+ if (IsMaxOp) {
MinMaxOp = X86ISD::FMAX;
OppositeZero.setSignBit();
} else {
@@ -28968,7 +29030,9 @@ static SDValue LowerFMINIMUM_FMAXIMUM(SDValue Op, const X86Subtarget &Subtarget,
if (IgnoreNaN || DAG.isKnownNeverNaN(NewX))
return MinMax;
- SDValue IsNaN = DAG.getSetCC(DL, SetCCType, NewX, NewX, ISD::SETUO);
+ SDValue IsNaN =
+ DAG.getSetCC(DL, SetCCType, NewX, NewX, IsNum ? ISD::SETO : ISD::SETUO);
+
return DAG.getSelect(DL, VT, IsNaN, NewX, MinMax);
}
@@ -33226,6 +33290,8 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::UMIN: return LowerMINMAX(Op, Subtarget, DAG);
case ISD::FMINIMUM:
case ISD::FMAXIMUM:
+ case ISD::FMINIMUMNUM:
+ case ISD::FMAXIMUMNUM:
return LowerFMINIMUM_FMAXIMUM(Op, Subtarget, DAG);
case ISD::ABS: return LowerABS(Op, Subtarget, DAG);
case ISD::ABDS:
@@ -33638,6 +33704,26 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
}
return;
}
+ case ISD::FP_TO_SINT_SAT:
+ case ISD::FP_TO_UINT_SAT: {
+ if (!Subtarget.hasAVX10_2())
+ return;
+
+ bool IsSigned = Opc == ISD::FP_TO_SINT_SAT;
+ EVT VT = N->getValueType(0);
+ SDValue Op = N->getOperand(0);
+ EVT OpVT = Op.getValueType();
+ SDValue Res;
+
+ if (VT == MVT::v2i32 && OpVT == MVT::v2f64) {
+ if (IsSigned)
+ Res = DAG.getNode(X86ISD::FP_TO_SINT_SAT, dl, MVT::v4i32, Op);
+ else
+ Res = DAG.getNode(X86ISD::FP_TO_UINT_SAT, dl, MVT::v4i32, Op);
+ Results.push_back(Res);
+ }
+ return;
+ }
case ISD::FP_TO_SINT:
case ISD::STRICT_FP_TO_SINT:
case ISD::FP_TO_UINT:
@@ -34618,6 +34704,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(VPERMV3)
NODE_NAME_CASE(VPERMI)
NODE_NAME_CASE(VPTERNLOG)
+ NODE_NAME_CASE(FP_TO_SINT_SAT)
+ NODE_NAME_CASE(FP_TO_UINT_SAT)
NODE_NAME_CASE(VFIXUPIMM)
NODE_NAME_CASE(VFIXUPIMM_SAE)
NODE_NAME_CASE(VFIXUPIMMS)
@@ -41606,6 +41694,8 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
MVT VT = N.getSimpleValueType();
+ unsigned NumElts = VT.getVectorNumElements();
+
SmallVector<int, 4> Mask;
unsigned Opcode = N.getOpcode();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -41891,7 +41981,7 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
APInt Mask = APInt::getHighBitsSet(64, 32);
if (DAG.MaskedValueIsZero(In, Mask)) {
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, In);
- MVT VecVT = MVT::getVectorVT(MVT::i32, VT.getVectorNumElements() * 2);
+ MVT VecVT = MVT::getVectorVT(MVT::i32, NumElts * 2);
SDValue SclVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, Trunc);
SDValue Movl = DAG.getNode(X86ISD::VZEXT_MOVL, DL, VecVT, SclVec);
return DAG.getBitcast(VT, Movl);
@@ -41906,7 +41996,6 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
// Create a vector constant - scalar constant followed by zeros.
EVT ScalarVT = N0.getOperand(0).getValueType();
Type *ScalarTy = ScalarVT.getTypeForEVT(*DAG.getContext());
- unsigned NumElts = VT.getVectorNumElements();
Constant *Zero = ConstantInt::getNullValue(ScalarTy);
SmallVector<Constant *, 32> ConstantVec(NumElts, Zero);
ConstantVec[0] = const_cast<ConstantInt *>(C->getConstantIntValue());
@@ -41957,9 +42046,8 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
MVT SrcVT = N0.getOperand(0).getSimpleValueType();
unsigned SrcBits = SrcVT.getScalarSizeInBits();
if ((EltBits % SrcBits) == 0 && SrcBits >= 32) {
- unsigned Size = VT.getVectorNumElements();
unsigned NewSize = SrcVT.getVectorNumElements();
- APInt BlendMask = N.getConstantOperandAPInt(2).zextOrTrunc(Size);
+ APInt BlendMask = N.getConstantOperandAPInt(2).zextOrTrunc(NumElts);
APInt NewBlendMask = APIntOps::ScaleBitMask(BlendMask, NewSize);
return DAG.getBitcast(
VT, DAG.getNode(X86ISD::BLENDI, DL, SrcVT, N0.getOperand(0),
@@ -42372,7 +42460,7 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
int DOffset = N.getOpcode() == X86ISD::PSHUFLW ? 0 : 2;
DMask[DOffset + 0] = DOffset + 1;
DMask[DOffset + 1] = DOffset + 0;
- MVT DVT = MVT::getVectorVT(MVT::i32, VT.getVectorNumElements() / 2);
+ MVT DVT = MVT::getVectorVT(MVT::i32, NumElts / 2);
V = DAG.getBitcast(DVT, V);
V = DAG.getNode(X86ISD::PSHUFD, DL, DVT, V,
getV4X86ShuffleImm8ForMask(DMask, DL, DAG));
@@ -45967,6 +46055,8 @@ static SDValue scalarizeExtEltFP(SDNode *ExtElt, SelectionDAG &DAG,
case ISD::FMAXNUM_IEEE:
case ISD::FMAXIMUM:
case ISD::FMINIMUM:
+ case ISD::FMAXIMUMNUM:
+ case ISD::FMINIMUMNUM:
case X86ISD::FMAX:
case X86ISD::FMIN:
case ISD::FABS: // Begin 1 operand
@@ -56175,6 +56265,33 @@ static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+// Custom handling for VCVTTPS2QQS/VCVTTPS2UQQS
+static SDValue combineFP_TO_xINT_SAT(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ if (!Subtarget.hasAVX10_2())
+ return SDValue();
+
+ bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT_SAT;
+ EVT SrcVT = N->getOperand(0).getValueType();
+ EVT DstVT = N->getValueType(0);
+ SDLoc dl(N);
+
+ if (SrcVT == MVT::v2f32 && DstVT == MVT::v2i64) {
+ SDValue V2F32Value = DAG.getUNDEF(SrcVT);
+
+ // Concatenate the original v2f32 input and V2F32Value to create v4f32
+ SDValue NewSrc = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32,
+ N->getOperand(0), V2F32Value);
+
+ // Select the FP_TO_SINT_SAT/FP_TO_UINT_SAT node
+ if (IsSigned)
+ return DAG.getNode(X86ISD::FP_TO_SINT_SAT, dl, MVT::v2i64, NewSrc);
+
+ return DAG.getNode(X86ISD::FP_TO_UINT_SAT, dl, MVT::v2i64, NewSrc);
+ }
+ return SDValue();
+}
+
static bool needCarryOrOverflowFlag(SDValue Flags) {
assert(Flags.getValueType() == MVT::i32 && "Unexpected VT!");
@@ -59288,6 +59405,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::INTRINSIC_WO_CHAIN: return combineINTRINSIC_WO_CHAIN(N, DAG, DCI);
case ISD::INTRINSIC_W_CHAIN: return combineINTRINSIC_W_CHAIN(N, DAG, DCI);
case ISD::INTRINSIC_VOID: return combineINTRINSIC_VOID(N, DAG, DCI);
+ case ISD::FP_TO_SINT_SAT:
+ case ISD::FP_TO_UINT_SAT: return combineFP_TO_xINT_SAT(N, DAG, Subtarget);
// clang-format on
}
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 2b7a8ea..eaedaa0 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -908,6 +908,10 @@ namespace llvm {
// Load x87 FPU environment from memory.
FLDENVm,
+ // Custom handling for FP_TO_xINT_SAT
+ FP_TO_SINT_SAT,
+ FP_TO_UINT_SAT,
+
/// This instruction implements FP_TO_SINT with the
/// integer destination in memory and a FP reg source. This corresponds
/// to the X86::FIST*m instructions and the rounding mode change stuff. It
diff --git a/llvm/lib/Target/X86/X86InstrAVX10.td b/llvm/lib/Target/X86/X86InstrAVX10.td
index 0301c07..1270161 100644
--- a/llvm/lib/Target/X86/X86InstrAVX10.td
+++ b/llvm/lib/Target/X86/X86InstrAVX10.td
@@ -403,28 +403,45 @@ multiclass avx10_minmax_scalar<string OpStr, X86VectorVTInfo _, SDNode OpNode,
SDNode OpNodeSAE> {
let ExeDomain = _.ExeDomain, Predicates = [HasAVX10_2] in {
let mayRaiseFPException = 1 in {
+ let isCodeGenOnly = 1 in {
+ def rri : AVX512Ii8<0x53, MRMSrcReg, (outs _.FRC:$dst),
+ (ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3),
+ !strconcat(OpStr, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
+ [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2, (i32 timm:$src3)))]>,
+ Sched<[WriteFMAX]>;
+
+ def rmi : AVX512Ii8<0x53, MRMSrcMem, (outs _.FRC:$dst),
+ (ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
+ !strconcat(OpStr, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
+ [(set _.FRC:$dst, (OpNode _.FRC:$src1, (_.ScalarLdFrag addr:$src2),
+ (i32 timm:$src3)))]>,
+ Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>;
+ }
defm rri : AVX512_maskable<0x53, MRMSrcReg, _, (outs VR128X:$dst),
- (ins VR128X:$src1, VR128X:$src2, i32u8imm:$src3),
- OpStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
- (_.VT (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
- (i32 timm:$src3)))>,
- Sched<[WriteFMAX]>;
+ (ins VR128X:$src1, VR128X:$src2, i32u8imm:$src3),
+ OpStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
+ (_.VT (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
+ (i32 timm:$src3))),
+ 0, 0, 0, vselect_mask, "", "_Int">,
+ Sched<[WriteFMAX]>;
defm rmi : AVX512_maskable<0x53, MRMSrcMem, _, (outs VR128X:$dst),
- (ins VR128X:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
- OpStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
- (_.VT (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2),
- (i32 timm:$src3)))>,
+ (ins VR128X:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
+ OpStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
+ (_.VT (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2),
+ (i32 timm:$src3))),
+ 0, 0, 0, vselect_mask, "", "_Int">,
Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>;
}
let Uses = []<Register>, mayRaiseFPException = 0 in
defm rrib : AVX512_maskable<0x53, MRMSrcReg, _, (outs VR128X:$dst),
- (ins VR128X:$src1, VR128X:$src2, i32u8imm:$src3),
- OpStr, "$src3, {sae}, $src2, $src1",
- "$src1, $src2, {sae}, $src3",
- (_.VT (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
- (i32 timm:$src3)))>,
- Sched<[WriteFMAX]>, EVEX_B;
+ (ins VR128X:$src1, VR128X:$src2, i32u8imm:$src3),
+ OpStr, "$src3, {sae}, $src2, $src1",
+ "$src1, $src2, {sae}, $src3",
+ (_.VT (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
+ (i32 timm:$src3))),
+ 0, 0, 0, vselect_mask, "", "_Int">,
+ Sched<[WriteFMAX]>, EVEX_B;
}
}
@@ -817,6 +834,70 @@ let Predicates = [HasAVX10_2] in {
// patterns have been disabled with null_frag.
// Patterns VCVTTPD2DQSZ128
+// VCVTTPD2DQS
+def : Pat<(v4i32(X86fp2sisat(v2f64 VR128X:$src))),
+ (VCVTTPD2DQSZ128rr VR128X:$src)>;
+def : Pat<(v4i32(fp_to_sint_sat(v4f64 VR256X:$src), i32)),
+ (VCVTTPD2DQSZ256rr VR256X:$src)>;
+def : Pat<(v8i32(fp_to_sint_sat(v8f64 VR512:$src), i32)),
+ (VCVTTPD2DQSZrr VR512:$src)>;
+
+// VCVTTPD2QQS
+def : Pat<(v2i64(fp_to_sint_sat(v2f64 VR128X:$src), i64)),
+ (VCVTTPD2QQSZ128rr VR128X:$src)>;
+def : Pat<(v4i64(fp_to_sint_sat(v4f64 VR256X:$src), i64)),
+ (VCVTTPD2QQSZ256rr VR256X:$src)>;
+def : Pat<(v8i64(fp_to_sint_sat(v8f64 VR512:$src), i64)),
+ (VCVTTPD2QQSZrr VR512:$src)>;
+
+// VCVTTPD2UDQS
+def : Pat<(v4i32(X86fp2uisat(v2f64 VR128X:$src))),
+ (VCVTTPD2UDQSZ128rr VR128X:$src)>;
+def : Pat<(v4i32(fp_to_uint_sat(v4f64 VR256X:$src), i32)),
+ (VCVTTPD2UDQSZ256rr VR256X:$src)>;
+def : Pat<(v8i32(fp_to_uint_sat(v8f64 VR512:$src), i32)),
+ (VCVTTPD2UDQSZrr VR512:$src)>;
+
+// VCVTTPD2UQQS
+def : Pat<(v2i64(fp_to_uint_sat(v2f64 VR128X:$src), i64)),
+ (VCVTTPD2UQQSZ128rr VR128X:$src)>;
+def : Pat<(v4i64(fp_to_uint_sat(v4f64 VR256X:$src), i64)),
+ (VCVTTPD2UQQSZ256rr VR256X:$src)>;
+def : Pat<(v8i64(fp_to_uint_sat(v8f64 VR512:$src), i64)),
+ (VCVTTPD2UQQSZrr VR512:$src)>;
+
+// VCVTTPS2DQS
+def : Pat<(v4i32(fp_to_sint_sat(v4f32 VR128X:$src), i32)),
+ (VCVTTPS2DQSZ128rr VR128X:$src)>;
+def : Pat<(v8i32(fp_to_sint_sat(v8f32 VR256X:$src), i32)),
+ (VCVTTPS2DQSZ256rr VR256X:$src)>;
+def : Pat<(v16i32(fp_to_sint_sat(v16f32 VR512:$src), i32)),
+ (VCVTTPS2DQSZrr VR512:$src)>;
+
+// VCVTTPS2QQS
+def : Pat<(v2i64(X86fp2sisat(v4f32 VR128X:$src))),
+ (VCVTTPS2QQSZ128rr VR128X:$src)>;
+def : Pat<(v4i64(fp_to_sint_sat(v4f32 VR128X:$src), i64)),
+ (VCVTTPS2QQSZ256rr VR128X:$src)>;
+def : Pat<(v8i64(fp_to_sint_sat(v8f32 VR256X:$src), i64)),
+ (VCVTTPS2QQSZrr VR256X:$src)>;
+
+// VCVTTPS2UDQS
+def : Pat<(v4i32(fp_to_uint_sat(v4f32 VR128X:$src), i32)),
+ (VCVTTPS2UDQSZ128rr VR128X:$src)>;
+def : Pat<(v8i32(fp_to_uint_sat(v8f32 VR256X:$src), i32)),
+ (VCVTTPS2UDQSZ256rr VR256X:$src)>;
+def : Pat<(v16i32(fp_to_uint_sat(v16f32 VR512:$src), i32)),
+ (VCVTTPS2UDQSZrr VR512:$src)>;
+
+// VCVTTPS2UQQS
+def : Pat<(v2i64(X86fp2uisat(v4f32 VR128X:$src))),
+ (VCVTTPS2UQQSZ128rr VR128X:$src)>;
+def : Pat<(v4i64(fp_to_uint_sat(v4f32 VR128X:$src), i64)),
+ (VCVTTPS2UQQSZ256rr VR128X:$src)>;
+def : Pat<(v8i64(fp_to_uint_sat(v8f32 VR256X:$src), i64)),
+ (VCVTTPS2UQQSZrr VR256X:$src)>;
+
def : Pat<(v4i32 (X86cvttp2sis (v2f64 VR128X:$src))),
(VCVTTPD2DQSZ128rr VR128X:$src)>;
def : Pat<(v4i32 (X86cvttp2sis (loadv2f64 addr:$src))),
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index e899807..d6ca4b1 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -28,19 +28,20 @@ multiclass AVX512_maskable_custom<bits<8> O, Format F,
bit IsCommutable = 0,
bit IsKCommutable = 0,
bit IsKZCommutable = IsCommutable,
- string ClobberConstraint = ""> {
+ string ClobberConstraint = "",
+ string Suffix = ""> {
let isCommutable = IsCommutable, Constraints = ClobberConstraint in
- def NAME: AVX512<O, F, Outs, Ins,
- OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
- "$dst, "#IntelSrcAsm#"}",
- Pattern>;
+ def Suffix: AVX512<O, F, Outs, Ins,
+ OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
+ "$dst, "#IntelSrcAsm#"}",
+ Pattern>;
// Prefer over VMOV*rrk Pat<>
let isCommutable = IsKCommutable in
- def NAME#k: AVX512<O, F, Outs, MaskingIns,
- OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
- "$dst {${mask}}, "#IntelSrcAsm#"}",
- MaskingPattern>,
+ def k#Suffix: AVX512<O, F, Outs, MaskingIns,
+ OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
+ "$dst {${mask}}, "#IntelSrcAsm#"}",
+ MaskingPattern>,
EVEX_K {
// In case of the 3src subclass this is overridden with a let.
string Constraints = !if(!eq(ClobberConstraint, ""), MaskingConstraint,
@@ -52,10 +53,10 @@ multiclass AVX512_maskable_custom<bits<8> O, Format F,
// So, it is Ok to use IsCommutable instead of IsKCommutable.
let isCommutable = IsKZCommutable, // Prefer over VMOV*rrkz Pat<>
Constraints = ClobberConstraint in
- def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns,
- OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"#
- "$dst {${mask}} {z}, "#IntelSrcAsm#"}",
- ZeroMaskingPattern>,
+ def kz#Suffix: AVX512<O, F, Outs, ZeroMaskingIns,
+ OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"#
+ "$dst {${mask}} {z}, "#IntelSrcAsm#"}",
+ ZeroMaskingPattern>,
EVEX_KZ;
}
@@ -72,7 +73,8 @@ multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
bit IsCommutable = 0,
bit IsKCommutable = 0,
bit IsKZCommutable = IsCommutable,
- string ClobberConstraint = ""> :
+ string ClobberConstraint = "",
+ string Suffix = ""> :
AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
AttSrcAsm, IntelSrcAsm,
[(set _.RC:$dst, RHS)],
@@ -80,7 +82,8 @@ multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
[(set _.RC:$dst,
(Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))],
MaskingConstraint, IsCommutable,
- IsKCommutable, IsKZCommutable, ClobberConstraint>;
+ IsKCommutable, IsKZCommutable, ClobberConstraint,
+ Suffix>;
// This multiclass generates the unconditional/non-masking, the masking and
// the zero-masking variant of the vector instruction. In the masking case, the
@@ -115,23 +118,24 @@ multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _,
bit IsCommutable = 0, bit IsKCommutable = 0,
bit IsKZCommutable = IsCommutable,
SDPatternOperator Select = vselect_mask,
- string ClobberConstraint = ""> :
+ string ClobberConstraint = "",
+ string Suffix = ""> :
AVX512_maskable_common<O, F, _, Outs, Ins,
!con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
!con((ins _.KRCWM:$mask), Ins),
OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
(Select _.KRCWM:$mask, RHS, _.RC:$src0),
Select, "$src0 = $dst", IsCommutable, IsKCommutable,
- IsKZCommutable, ClobberConstraint>;
+ IsKZCommutable, ClobberConstraint, Suffix>;
// This multiclass generates the unconditional/non-masking, the masking and
// the zero-masking variant of the scalar instruction.
multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _,
dag Outs, dag Ins, string OpcodeStr,
string AttSrcAsm, string IntelSrcAsm,
- dag RHS> :
+ dag RHS, string Suffix = ""> :
AVX512_maskable<O, F, _, Outs, Ins, OpcodeStr, AttSrcAsm, IntelSrcAsm,
- RHS, 0, 0, 0, X86selects_mask>;
+ RHS, 0, 0, 0, X86selects_mask, "", Suffix>;
// Similar to AVX512_maskable but in this case one of the source operands
// ($src1) is already tied to $dst so we just use that for the preserved
@@ -144,7 +148,7 @@ multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _,
bit IsCommutable = 0,
bit IsKCommutable = 0,
SDPatternOperator Select = vselect_mask,
- bit MaskOnly = 0> :
+ bit MaskOnly = 0, string Suffix = ""> :
AVX512_maskable_common<O, F, _, Outs,
!con((ins _.RC:$src1), NonTiedIns),
!con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
@@ -152,7 +156,8 @@ multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _,
OpcodeStr, AttSrcAsm, IntelSrcAsm,
!if(MaskOnly, (null_frag), RHS),
(Select _.KRCWM:$mask, RHS, _.RC:$src1),
- Select, "", IsCommutable, IsKCommutable>;
+ Select, "", IsCommutable, IsKCommutable,
+ IsCommutable, "", Suffix>;
// Similar to AVX512_maskable_3src but in this case the input VT for the tied
// operand differs from the output VT. This requires a bitconvert on
@@ -178,10 +183,10 @@ multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _,
dag RHS,
bit IsCommutable = 0,
bit IsKCommutable = 0,
- bit MaskOnly = 0> :
+ bit MaskOnly = 0, string Suffix = ""> :
AVX512_maskable_3src<O, F, _, Outs, NonTiedIns, OpcodeStr, AttSrcAsm,
IntelSrcAsm, RHS, IsCommutable, IsKCommutable,
- X86selects_mask, MaskOnly>;
+ X86selects_mask, MaskOnly, Suffix>;
multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
dag Outs, dag Ins,
@@ -215,17 +220,18 @@ multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F,
string AttSrcAsm, string IntelSrcAsm,
list<dag> Pattern,
list<dag> MaskingPattern,
- bit IsCommutable = 0> {
+ bit IsCommutable = 0,
+ string Suffix = ""> {
let isCommutable = IsCommutable in {
- def NAME: AVX512<O, F, Outs, Ins,
+ def Suffix: AVX512<O, F, Outs, Ins,
OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
"$dst, "#IntelSrcAsm#"}",
Pattern>;
- def NAME#k: AVX512<O, F, Outs, MaskingIns,
- OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
- "$dst {${mask}}, "#IntelSrcAsm#"}",
- MaskingPattern>, EVEX_K;
+ def k#Suffix: AVX512<O, F, Outs, MaskingIns,
+ OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
+ "$dst {${mask}}, "#IntelSrcAsm#"}",
+ MaskingPattern>, EVEX_K;
}
}
@@ -235,20 +241,22 @@ multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _,
string OpcodeStr,
string AttSrcAsm, string IntelSrcAsm,
dag RHS, dag MaskingRHS,
- bit IsCommutable = 0> :
+ bit IsCommutable = 0,
+ string Suffix = ""> :
AVX512_maskable_custom_cmp<O, F, Outs, Ins, MaskingIns, OpcodeStr,
AttSrcAsm, IntelSrcAsm,
[(set _.KRC:$dst, RHS)],
- [(set _.KRC:$dst, MaskingRHS)], IsCommutable>;
+ [(set _.KRC:$dst, MaskingRHS)], IsCommutable, Suffix>;
multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _,
dag Outs, dag Ins, string OpcodeStr,
string AttSrcAsm, string IntelSrcAsm,
- dag RHS, dag RHS_su, bit IsCommutable = 0> :
+ dag RHS, dag RHS_su, bit IsCommutable = 0,
+ string Suffix = ""> :
AVX512_maskable_common_cmp<O, F, _, Outs, Ins,
!con((ins _.KRCWM:$mask), Ins),
OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
- (and _.KRCWM:$mask, RHS_su), IsCommutable>;
+ (and _.KRCWM:$mask, RHS_su), IsCommutable, Suffix>;
// Used by conversion instructions.
multiclass AVX512_maskable_cvt<bits<8> O, Format F, X86VectorVTInfo _,
@@ -1937,37 +1945,37 @@ defm VPBLENDMW : blendmask_bw<0x66, "vpblendmw", SchedWriteVarBlend,
multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE,
PatFrag OpNode_su, PatFrag OpNodeSAE_su,
X86FoldableSchedWrite sched> {
- defm rri_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
- (outs _.KRC:$dst),
- (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
- "vcmp"#_.Suffix,
- "$cc, $src2, $src1", "$src1, $src2, $cc",
- (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
- (OpNode_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc)>,
- EVEX, VVVV, VEX_LIG, Sched<[sched]>, SIMD_EXC;
+ defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
+ (outs _.KRC:$dst),
+ (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
+ "vcmp"#_.Suffix,
+ "$cc, $src2, $src1", "$src1, $src2, $cc",
+ (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
+ (OpNode_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc), 0, "_Int">,
+ EVEX, VVVV, VEX_LIG, Sched<[sched]>, SIMD_EXC;
let mayLoad = 1 in
- defm rmi_Int : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
- (outs _.KRC:$dst),
- (ins _.RC:$src1, _.IntScalarMemOp:$src2, u8imm:$cc),
- "vcmp"#_.Suffix,
- "$cc, $src2, $src1", "$src1, $src2, $cc",
- (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2),
- timm:$cc),
- (OpNode_su (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2),
- timm:$cc)>, EVEX, VVVV, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
- Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
+ defm rmi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
+ (outs _.KRC:$dst),
+ (ins _.RC:$src1, _.IntScalarMemOp:$src2, u8imm:$cc),
+ "vcmp"#_.Suffix,
+ "$cc, $src2, $src1", "$src1, $src2, $cc",
+ (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2),
+ timm:$cc),
+ (OpNode_su (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2),
+ timm:$cc), 0, "_Int">, EVEX, VVVV, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
+ Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
let Uses = [MXCSR] in
- defm rrib_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
- (outs _.KRC:$dst),
- (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
- "vcmp"#_.Suffix,
- "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc",
- (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
- timm:$cc),
- (OpNodeSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
- timm:$cc)>,
- EVEX, VVVV, VEX_LIG, EVEX_B, Sched<[sched]>;
+ defm rrib : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
+ (outs _.KRC:$dst),
+ (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
+ "vcmp"#_.Suffix,
+ "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc",
+ (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
+ timm:$cc),
+ (OpNodeSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
+ timm:$cc), 0, "_Int">,
+ EVEX, VVVV, VEX_LIG, EVEX_B, Sched<[sched]>;
let isCodeGenOnly = 1 in {
let isCommutable = 1 in
@@ -5354,17 +5362,17 @@ multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
SDPatternOperator OpNode, SDNode VecNode,
X86FoldableSchedWrite sched, bit IsCommutable> {
let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
- defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
- (_.VT (VecNode _.RC:$src1, _.RC:$src2))>,
+ (_.VT (VecNode _.RC:$src1, _.RC:$src2)), "_Int">,
Sched<[sched]>;
- defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_.VT (VecNode _.RC:$src1,
- (_.ScalarIntMemFrags addr:$src2)))>,
+ (_.ScalarIntMemFrags addr:$src2))), "_Int">,
Sched<[sched.Folded, sched.ReadAfterFold]>;
let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
@@ -5387,28 +5395,28 @@ multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
SDNode VecNode, X86FoldableSchedWrite sched> {
let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
- defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ defm rrb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
"$rc, $src2, $src1", "$src1, $src2, $rc",
(VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
- (i32 timm:$rc))>,
+ (i32 timm:$rc)), "_Int">,
EVEX_B, EVEX_RC, Sched<[sched]>;
}
multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
SDPatternOperator OpNode, SDNode VecNode, SDNode SaeNode,
X86FoldableSchedWrite sched, bit IsCommutable> {
let ExeDomain = _.ExeDomain in {
- defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
- (_.VT (VecNode _.RC:$src1, _.RC:$src2))>,
+ (_.VT (VecNode _.RC:$src1, _.RC:$src2)), "_Int">,
Sched<[sched]>, SIMD_EXC;
- defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_.VT (VecNode _.RC:$src1,
- (_.ScalarIntMemFrags addr:$src2)))>,
+ (_.ScalarIntMemFrags addr:$src2))), "_Int">,
Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
let isCodeGenOnly = 1, Predicates = [HasAVX512],
@@ -5429,10 +5437,10 @@ multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
}
let Uses = [MXCSR] in
- defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ defm rrb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
"{sae}, $src2, $src1", "$src1, $src2, {sae}",
- (SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
+ (SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)), "_Int">,
EVEX_B, Sched<[sched]>;
}
}
@@ -6835,22 +6843,22 @@ defm VFNMSUB132 : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86any_Fnmsub,
multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
dag RHS_r, dag RHS_m, dag RHS_b, bit MaskOnlyReg> {
let Constraints = "$src1 = $dst", hasSideEffects = 0 in {
- defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ defm r: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3), OpcodeStr,
- "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
+ "$src3, $src2", "$src2, $src3", (null_frag), 1, 1, 0, "_Int">,
EVEX, VVVV, Sched<[SchedWriteFMA.Scl]>, SIMD_EXC;
let mayLoad = 1 in
- defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ defm m: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr,
- "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
+ "$src3, $src2", "$src2, $src3", (null_frag), 1, 1, 0, "_Int">,
EVEX, VVVV, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold,
SchedWriteFMA.Scl.ReadAfterFold]>, SIMD_EXC;
let Uses = [MXCSR] in
- defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ defm rb: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
- OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", (null_frag), 1, 1>,
+ OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", (null_frag), 1, 1, 0, "_Int">,
EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[SchedWriteFMA.Scl]>;
let isCodeGenOnly = 1, isCommutable = 1 in {
@@ -6982,7 +6990,7 @@ multiclass avx512_scalar_fma_patterns<SDPatternOperator Op, SDNode MaskedOp,
(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
_.FRC:$src3),
(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
- (!cast<I>(Prefix#"213"#Suffix#"Zr_Intk")
+ (!cast<I>(Prefix#"213"#Suffix#"Zrk_Int")
VR128X:$src1, VK1WM:$mask,
(_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
(_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
@@ -6993,7 +7001,7 @@ multiclass avx512_scalar_fma_patterns<SDPatternOperator Op, SDNode MaskedOp,
(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
(_.ScalarLdFrag addr:$src3)),
(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
- (!cast<I>(Prefix#"213"#Suffix#"Zm_Intk")
+ (!cast<I>(Prefix#"213"#Suffix#"Zmk_Int")
VR128X:$src1, VK1WM:$mask,
(_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
@@ -7002,7 +7010,7 @@ multiclass avx512_scalar_fma_patterns<SDPatternOperator Op, SDNode MaskedOp,
(MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
(_.ScalarLdFrag addr:$src3), _.FRC:$src2),
(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
- (!cast<I>(Prefix#"132"#Suffix#"Zm_Intk")
+ (!cast<I>(Prefix#"132"#Suffix#"Zmk_Int")
VR128X:$src1, VK1WM:$mask,
(_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
@@ -7011,7 +7019,7 @@ multiclass avx512_scalar_fma_patterns<SDPatternOperator Op, SDNode MaskedOp,
(MaskedOp _.FRC:$src2, _.FRC:$src3,
(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
- (!cast<I>(Prefix#"231"#Suffix#"Zr_Intk")
+ (!cast<I>(Prefix#"231"#Suffix#"Zrk_Int")
VR128X:$src1, VK1WM:$mask,
(_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
(_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
@@ -7021,7 +7029,7 @@ multiclass avx512_scalar_fma_patterns<SDPatternOperator Op, SDNode MaskedOp,
(MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
- (!cast<I>(Prefix#"231"#Suffix#"Zm_Intk")
+ (!cast<I>(Prefix#"231"#Suffix#"Zmk_Int")
VR128X:$src1, VK1WM:$mask,
(_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
@@ -7031,7 +7039,7 @@ multiclass avx512_scalar_fma_patterns<SDPatternOperator Op, SDNode MaskedOp,
(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
_.FRC:$src3),
(_.EltVT ZeroFP)))))),
- (!cast<I>(Prefix#"213"#Suffix#"Zr_Intkz")
+ (!cast<I>(Prefix#"213"#Suffix#"Zrkz_Int")
VR128X:$src1, VK1WM:$mask,
(_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
(_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
@@ -7041,7 +7049,7 @@ multiclass avx512_scalar_fma_patterns<SDPatternOperator Op, SDNode MaskedOp,
(MaskedOp _.FRC:$src2, _.FRC:$src3,
(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
(_.EltVT ZeroFP)))))),
- (!cast<I>(Prefix#"231"#Suffix#"Zr_Intkz")
+ (!cast<I>(Prefix#"231"#Suffix#"Zrkz_Int")
VR128X:$src1, VK1WM:$mask,
(_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
(_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
@@ -7052,7 +7060,7 @@ multiclass avx512_scalar_fma_patterns<SDPatternOperator Op, SDNode MaskedOp,
(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
(_.ScalarLdFrag addr:$src3)),
(_.EltVT ZeroFP)))))),
- (!cast<I>(Prefix#"213"#Suffix#"Zm_Intkz")
+ (!cast<I>(Prefix#"213"#Suffix#"Zmkz_Int")
VR128X:$src1, VK1WM:$mask,
(_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
@@ -7061,7 +7069,7 @@ multiclass avx512_scalar_fma_patterns<SDPatternOperator Op, SDNode MaskedOp,
(MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
_.FRC:$src2, (_.ScalarLdFrag addr:$src3)),
(_.EltVT ZeroFP)))))),
- (!cast<I>(Prefix#"132"#Suffix#"Zm_Intkz")
+ (!cast<I>(Prefix#"132"#Suffix#"Zmkz_Int")
VR128X:$src1, VK1WM:$mask,
(_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
@@ -7070,7 +7078,7 @@ multiclass avx512_scalar_fma_patterns<SDPatternOperator Op, SDNode MaskedOp,
(MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
(_.EltVT ZeroFP)))))),
- (!cast<I>(Prefix#"231"#Suffix#"Zm_Intkz")
+ (!cast<I>(Prefix#"231"#Suffix#"Zmkz_Int")
VR128X:$src1, VK1WM:$mask,
(_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
@@ -7097,7 +7105,7 @@ multiclass avx512_scalar_fma_patterns<SDPatternOperator Op, SDNode MaskedOp,
(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
_.FRC:$src3, (i32 timm:$rc)),
(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
- (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intk")
+ (!cast<I>(Prefix#"213"#Suffix#"Zrbk_Int")
VR128X:$src1, VK1WM:$mask,
(_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
(_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
@@ -7108,7 +7116,7 @@ multiclass avx512_scalar_fma_patterns<SDPatternOperator Op, SDNode MaskedOp,
(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
(i32 timm:$rc)),
(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
- (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intk")
+ (!cast<I>(Prefix#"231"#Suffix#"Zrbk_Int")
VR128X:$src1, VK1WM:$mask,
(_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
(_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
@@ -7119,7 +7127,7 @@ multiclass avx512_scalar_fma_patterns<SDPatternOperator Op, SDNode MaskedOp,
(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
_.FRC:$src3, (i32 timm:$rc)),
(_.EltVT ZeroFP)))))),
- (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intkz")
+ (!cast<I>(Prefix#"213"#Suffix#"Zrbkz_Int")
VR128X:$src1, VK1WM:$mask,
(_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
(_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
@@ -7130,7 +7138,7 @@ multiclass avx512_scalar_fma_patterns<SDPatternOperator Op, SDNode MaskedOp,
(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
(i32 timm:$rc)),
(_.EltVT ZeroFP)))))),
- (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intkz")
+ (!cast<I>(Prefix#"231"#Suffix#"Zrbkz_Int")
VR128X:$src1, VK1WM:$mask,
(_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
(_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
@@ -7628,17 +7636,17 @@ let Uses = [MXCSR], mayRaiseFPException = 1 in
multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
X86VectorVTInfo _Src, SDNode OpNode,
X86FoldableSchedWrite sched> {
- defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_.VT (OpNode (_.VT _.RC:$src1),
- (_Src.VT _Src.RC:$src2)))>,
+ (_Src.VT _Src.RC:$src2))), "_Int">,
EVEX, VVVV, VEX_LIG, Sched<[sched]>;
- defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _Src.IntScalarMemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_.VT (OpNode (_.VT _.RC:$src1),
- (_Src.ScalarIntMemFrags addr:$src2)))>,
+ (_Src.ScalarIntMemFrags addr:$src2))), "_Int">,
EVEX, VVVV, VEX_LIG,
Sched<[sched.Folded, sched.ReadAfterFold]>;
@@ -7660,11 +7668,11 @@ multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTIn
X86VectorVTInfo _Src, SDNode OpNodeSAE,
X86FoldableSchedWrite sched> {
let Uses = [MXCSR] in
- defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ defm rrb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
"{sae}, $src2, $src1", "$src1, $src2, {sae}",
(_.VT (OpNodeSAE (_.VT _.RC:$src1),
- (_Src.VT _Src.RC:$src2)))>,
+ (_Src.VT _Src.RC:$src2))), "_Int">,
EVEX, VVVV, VEX_LIG, EVEX_B, Sched<[sched]>;
}
@@ -7673,11 +7681,11 @@ multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInf
X86VectorVTInfo _Src, SDNode OpNodeRnd,
X86FoldableSchedWrite sched> {
let Uses = [MXCSR] in
- defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ defm rrb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr,
"$rc, $src2, $src1", "$src1, $src2, $rc",
(_.VT (OpNodeRnd (_.VT _.RC:$src1),
- (_Src.VT _Src.RC:$src2), (i32 timm:$rc)))>,
+ (_Src.VT _Src.RC:$src2), (i32 timm:$rc))), "_Int">,
EVEX, VVVV, VEX_LIG, Sched<[sched]>,
EVEX_B, EVEX_RC;
}
@@ -9531,25 +9539,25 @@ multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr,
multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
X86VectorVTInfo _, string Name, Predicate prd = HasAVX512> {
let ExeDomain = _.ExeDomain, Predicates = [prd] in {
- defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(X86fsqrts (_.VT _.RC:$src1),
- (_.VT _.RC:$src2))>,
+ (_.VT _.RC:$src2)), "_Int">,
Sched<[sched]>, SIMD_EXC;
- defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(X86fsqrts (_.VT _.RC:$src1),
- (_.ScalarIntMemFrags addr:$src2))>,
+ (_.ScalarIntMemFrags addr:$src2)), "_Int">,
Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
let Uses = [MXCSR] in
- defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
"$rc, $src2, $src1", "$src1, $src2, $rc",
(X86fsqrtRnds (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
- (i32 timm:$rc))>,
+ (i32 timm:$rc)), "_Int">,
EVEX_B, EVEX_RC, Sched<[sched]>;
let isCodeGenOnly = 1, hasSideEffects = 0 in {
@@ -9596,27 +9604,27 @@ defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, VEX_LI
multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
X86FoldableSchedWrite sched, X86VectorVTInfo _> {
let ExeDomain = _.ExeDomain in {
- defm rri_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
"$src3, $src2, $src1", "$src1, $src2, $src3",
(_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2),
- (i32 timm:$src3)))>,
+ (i32 timm:$src3))), "_Int">,
Sched<[sched]>, SIMD_EXC;
let Uses = [MXCSR] in
- defm rrib_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ defm rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
"$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3",
(_.VT (X86RndScalesSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
- (i32 timm:$src3)))>, EVEX_B,
+ (i32 timm:$src3))), "_Int">, EVEX_B,
Sched<[sched]>;
- defm rmi_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
OpcodeStr,
"$src3, $src2, $src1", "$src1, $src2, $src3",
(_.VT (X86RndScales _.RC:$src1,
- (_.ScalarIntMemFrags addr:$src2), (i32 timm:$src3)))>,
+ (_.ScalarIntMemFrags addr:$src2), (i32 timm:$src3))), "_Int">,
Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in {
@@ -9669,13 +9677,13 @@ multiclass avx512_masked_scalar<SDNode OpNode, string OpcPrefix, SDNode Move,
def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects_mask Mask,
(OpNode (extractelt _.VT:$src2, (iPTR 0))),
(extractelt _.VT:$dst, (iPTR 0))))),
- (!cast<Instruction>("V"#OpcPrefix#r_Intk)
+ (!cast<Instruction>("V"#OpcPrefix#rk_Int)
_.VT:$dst, OutMask, _.VT:$src2, _.VT:$src1)>;
def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects_mask Mask,
(OpNode (extractelt _.VT:$src2, (iPTR 0))),
ZeroFP))),
- (!cast<Instruction>("V"#OpcPrefix#r_Intkz)
+ (!cast<Instruction>("V"#OpcPrefix#rkz_Int)
OutMask, _.VT:$src2, _.VT:$src1)>;
}
}
@@ -12174,7 +12182,7 @@ multiclass AVX512_scalar_math_fp_patterns<SDPatternOperator Op, SDNode MaskedOp,
(extractelt (_.VT VR128X:$src1), (iPTR 0))),
_.FRC:$src2),
_.FRC:$src0))),
- (!cast<Instruction>("V"#OpcPrefix#"Zrr_Intk")
+ (!cast<Instruction>("V"#OpcPrefix#"Zrrk_Int")
(_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
VK1WM:$mask, _.VT:$src1,
(_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
@@ -12185,7 +12193,7 @@ multiclass AVX512_scalar_math_fp_patterns<SDPatternOperator Op, SDNode MaskedOp,
(extractelt (_.VT VR128X:$src1), (iPTR 0))),
(_.ScalarLdFrag addr:$src2)),
_.FRC:$src0))),
- (!cast<Instruction>("V"#OpcPrefix#"Zrm_Intk")
+ (!cast<Instruction>("V"#OpcPrefix#"Zrmk_Int")
(_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
VK1WM:$mask, _.VT:$src1, addr:$src2)>;
@@ -12196,7 +12204,7 @@ multiclass AVX512_scalar_math_fp_patterns<SDPatternOperator Op, SDNode MaskedOp,
(MaskedOp (_.EltVT
(extractelt (_.VT VR128X:$src1), (iPTR 0))),
_.FRC:$src2), (_.EltVT ZeroFP)))),
- (!cast<I>("V"#OpcPrefix#"Zrr_Intkz")
+ (!cast<I>("V"#OpcPrefix#"Zrrkz_Int")
VK1WM:$mask, _.VT:$src1,
(_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
def : Pat<(MoveNode (_.VT VR128X:$src1),
@@ -12205,7 +12213,7 @@ multiclass AVX512_scalar_math_fp_patterns<SDPatternOperator Op, SDNode MaskedOp,
(MaskedOp (_.EltVT
(extractelt (_.VT VR128X:$src1), (iPTR 0))),
(_.ScalarLdFrag addr:$src2)), (_.EltVT ZeroFP)))),
- (!cast<I>("V"#OpcPrefix#"Zrm_Intkz") VK1WM:$mask, _.VT:$src1, addr:$src2)>;
+ (!cast<I>("V"#OpcPrefix#"Zrmkz_Int") VK1WM:$mask, _.VT:$src1, addr:$src2)>;
}
}
diff --git a/llvm/lib/Target/X86/X86InstrFMA3Info.cpp b/llvm/lib/Target/X86/X86InstrFMA3Info.cpp
index 090ec68..0da4857 100644
--- a/llvm/lib/Target/X86/X86InstrFMA3Info.cpp
+++ b/llvm/lib/Target/X86/X86InstrFMA3Info.cpp
@@ -27,6 +27,11 @@ using namespace llvm;
FMA3GROUP(Name, Suf##k, Attrs | X86InstrFMA3Group::KMergeMasked) \
FMA3GROUP(Name, Suf##kz, Attrs | X86InstrFMA3Group::KZeroMasked)
+#define FMA3GROUP_MASKED_INT(Name, Suf, Attrs) \
+ FMA3GROUP(Name, Suf##_Int, Attrs) \
+ FMA3GROUP(Name, Suf##k_Int, Attrs | X86InstrFMA3Group::KMergeMasked) \
+ FMA3GROUP(Name, Suf##kz_Int, Attrs | X86InstrFMA3Group::KZeroMasked)
+
#define FMA3GROUP_PACKED_WIDTHS_Z(Name, Suf, Attrs) \
FMA3GROUP_MASKED(Name, Suf##Z128m, Attrs) \
FMA3GROUP_MASKED(Name, Suf##Z128r, Attrs) \
@@ -52,9 +57,9 @@ using namespace llvm;
#define FMA3GROUP_SCALAR_WIDTHS_Z(Name, Suf, Attrs) \
FMA3GROUP(Name, Suf##Zm, Attrs) \
- FMA3GROUP_MASKED(Name, Suf##Zm_Int, Attrs | X86InstrFMA3Group::Intrinsic) \
+ FMA3GROUP_MASKED_INT(Name, Suf##Zm, Attrs | X86InstrFMA3Group::Intrinsic) \
FMA3GROUP(Name, Suf##Zr, Attrs) \
- FMA3GROUP_MASKED(Name, Suf##Zr_Int, Attrs | X86InstrFMA3Group::Intrinsic) \
+ FMA3GROUP_MASKED_INT(Name, Suf##Zr, Attrs | X86InstrFMA3Group::Intrinsic) \
#define FMA3GROUP_SCALAR_WIDTHS_ALL(Name, Suf, Attrs) \
FMA3GROUP_SCALAR_WIDTHS_Z(Name, Suf, Attrs) \
@@ -108,11 +113,11 @@ static const X86InstrFMA3Group Groups[] = {
#define FMA3GROUP_SCALAR_AVX512_ROUND(Name, Suf, Attrs) \
FMA3GROUP(Name, SDZ##Suf, Attrs) \
- FMA3GROUP_MASKED(Name, SDZ##Suf##_Int, Attrs) \
+ FMA3GROUP_MASKED_INT(Name, SDZ##Suf, Attrs) \
FMA3GROUP(Name, SHZ##Suf, Attrs) \
- FMA3GROUP_MASKED(Name, SHZ##Suf##_Int, Attrs) \
+ FMA3GROUP_MASKED_INT(Name, SHZ##Suf, Attrs) \
FMA3GROUP(Name, SSZ##Suf, Attrs) \
- FMA3GROUP_MASKED(Name, SSZ##Suf##_Int, Attrs)
+ FMA3GROUP_MASKED_INT(Name, SSZ##Suf, Attrs)
static const X86InstrFMA3Group BroadcastGroups[] = {
FMA3GROUP_PACKED_AVX512_ALL(VFMADD, mb, 0)
diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index f6231b7..af0267a 100644
--- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -390,6 +390,13 @@ def SDTFmaRound : SDTypeProfile<1, 4, [SDTCisSameAs<0,1>,
SDTCisSameAs<1,2>, SDTCisSameAs<1,3>,
SDTCisFP<0>, SDTCisVT<4, i32>]>;
+def SDTFPToxIntSatOp
+ : SDTypeProfile<1,
+ 1, [SDTCisVec<0>, SDTCisVec<1>, SDTCisInt<0>, SDTCisFP<1>]>;
+
+def X86fp2sisat : SDNode<"X86ISD::FP_TO_SINT_SAT", SDTFPToxIntSatOp>;
+def X86fp2uisat : SDNode<"X86ISD::FP_TO_UINT_SAT", SDTFPToxIntSatOp>;
+
def X86PAlignr : SDNode<"X86ISD::PALIGNR",
SDTypeProfile<1, 3, [SDTCVecEltisVT<0, i8>,
SDTCisSameAs<0,1>,
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 5a6ea11..30a5161 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -7646,8 +7646,8 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI,
case X86::CVTSS2SDrr_Int:
case X86::VCVTSS2SDrr_Int:
case X86::VCVTSS2SDZrr_Int:
- case X86::VCVTSS2SDZrr_Intk:
- case X86::VCVTSS2SDZrr_Intkz:
+ case X86::VCVTSS2SDZrrk_Int:
+ case X86::VCVTSS2SDZrrkz_Int:
case X86::CVTSS2SIrr_Int:
case X86::CVTSS2SI64rr_Int:
case X86::VCVTSS2SIrr_Int:
@@ -7700,21 +7700,21 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI,
case X86::SUBSSrr_Int:
case X86::VSUBSSrr_Int:
case X86::VSUBSSZrr_Int:
- case X86::VADDSSZrr_Intk:
- case X86::VADDSSZrr_Intkz:
- case X86::VCMPSSZrri_Intk:
- case X86::VDIVSSZrr_Intk:
- case X86::VDIVSSZrr_Intkz:
- case X86::VMAXSSZrr_Intk:
- case X86::VMAXSSZrr_Intkz:
- case X86::VMINSSZrr_Intk:
- case X86::VMINSSZrr_Intkz:
- case X86::VMULSSZrr_Intk:
- case X86::VMULSSZrr_Intkz:
- case X86::VSQRTSSZr_Intk:
- case X86::VSQRTSSZr_Intkz:
- case X86::VSUBSSZrr_Intk:
- case X86::VSUBSSZrr_Intkz:
+ case X86::VADDSSZrrk_Int:
+ case X86::VADDSSZrrkz_Int:
+ case X86::VCMPSSZrrik_Int:
+ case X86::VDIVSSZrrk_Int:
+ case X86::VDIVSSZrrkz_Int:
+ case X86::VMAXSSZrrk_Int:
+ case X86::VMAXSSZrrkz_Int:
+ case X86::VMINSSZrrk_Int:
+ case X86::VMINSSZrrkz_Int:
+ case X86::VMULSSZrrk_Int:
+ case X86::VMULSSZrrkz_Int:
+ case X86::VSQRTSSZrk_Int:
+ case X86::VSQRTSSZrkz_Int:
+ case X86::VSUBSSZrrk_Int:
+ case X86::VSUBSSZrrkz_Int:
case X86::VFMADDSS4rr_Int:
case X86::VFNMADDSS4rr_Int:
case X86::VFMSUBSS4rr_Int:
@@ -7743,30 +7743,30 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI,
case X86::VFNMSUB213SSZr_Int:
case X86::VFMSUB231SSZr_Int:
case X86::VFNMSUB231SSZr_Int:
- case X86::VFMADD132SSZr_Intk:
- case X86::VFNMADD132SSZr_Intk:
- case X86::VFMADD213SSZr_Intk:
- case X86::VFNMADD213SSZr_Intk:
- case X86::VFMADD231SSZr_Intk:
- case X86::VFNMADD231SSZr_Intk:
- case X86::VFMSUB132SSZr_Intk:
- case X86::VFNMSUB132SSZr_Intk:
- case X86::VFMSUB213SSZr_Intk:
- case X86::VFNMSUB213SSZr_Intk:
- case X86::VFMSUB231SSZr_Intk:
- case X86::VFNMSUB231SSZr_Intk:
- case X86::VFMADD132SSZr_Intkz:
- case X86::VFNMADD132SSZr_Intkz:
- case X86::VFMADD213SSZr_Intkz:
- case X86::VFNMADD213SSZr_Intkz:
- case X86::VFMADD231SSZr_Intkz:
- case X86::VFNMADD231SSZr_Intkz:
- case X86::VFMSUB132SSZr_Intkz:
- case X86::VFNMSUB132SSZr_Intkz:
- case X86::VFMSUB213SSZr_Intkz:
- case X86::VFNMSUB213SSZr_Intkz:
- case X86::VFMSUB231SSZr_Intkz:
- case X86::VFNMSUB231SSZr_Intkz:
+ case X86::VFMADD132SSZrk_Int:
+ case X86::VFNMADD132SSZrk_Int:
+ case X86::VFMADD213SSZrk_Int:
+ case X86::VFNMADD213SSZrk_Int:
+ case X86::VFMADD231SSZrk_Int:
+ case X86::VFNMADD231SSZrk_Int:
+ case X86::VFMSUB132SSZrk_Int:
+ case X86::VFNMSUB132SSZrk_Int:
+ case X86::VFMSUB213SSZrk_Int:
+ case X86::VFNMSUB213SSZrk_Int:
+ case X86::VFMSUB231SSZrk_Int:
+ case X86::VFNMSUB231SSZrk_Int:
+ case X86::VFMADD132SSZrkz_Int:
+ case X86::VFNMADD132SSZrkz_Int:
+ case X86::VFMADD213SSZrkz_Int:
+ case X86::VFNMADD213SSZrkz_Int:
+ case X86::VFMADD231SSZrkz_Int:
+ case X86::VFNMADD231SSZrkz_Int:
+ case X86::VFMSUB132SSZrkz_Int:
+ case X86::VFNMSUB132SSZrkz_Int:
+ case X86::VFMSUB213SSZrkz_Int:
+ case X86::VFNMSUB213SSZrkz_Int:
+ case X86::VFMSUB231SSZrkz_Int:
+ case X86::VFNMSUB231SSZrkz_Int:
case X86::VFIXUPIMMSSZrri:
case X86::VFIXUPIMMSSZrrik:
case X86::VFIXUPIMMSSZrrikz:
@@ -7791,8 +7791,8 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI,
case X86::VREDUCESSZrrik:
case X86::VREDUCESSZrrikz:
case X86::VRNDSCALESSZrri_Int:
- case X86::VRNDSCALESSZrri_Intk:
- case X86::VRNDSCALESSZrri_Intkz:
+ case X86::VRNDSCALESSZrrik_Int:
+ case X86::VRNDSCALESSZrrikz_Int:
case X86::VRSQRT14SSZrr:
case X86::VRSQRT14SSZrrk:
case X86::VRSQRT14SSZrrkz:
@@ -7819,8 +7819,8 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI,
case X86::CVTSD2SSrr_Int:
case X86::VCVTSD2SSrr_Int:
case X86::VCVTSD2SSZrr_Int:
- case X86::VCVTSD2SSZrr_Intk:
- case X86::VCVTSD2SSZrr_Intkz:
+ case X86::VCVTSD2SSZrrk_Int:
+ case X86::VCVTSD2SSZrrkz_Int:
case X86::CVTSD2SIrr_Int:
case X86::CVTSD2SI64rr_Int:
case X86::VCVTSD2SIrr_Int:
@@ -7869,21 +7869,21 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI,
case X86::SUBSDrr_Int:
case X86::VSUBSDrr_Int:
case X86::VSUBSDZrr_Int:
- case X86::VADDSDZrr_Intk:
- case X86::VADDSDZrr_Intkz:
- case X86::VCMPSDZrri_Intk:
- case X86::VDIVSDZrr_Intk:
- case X86::VDIVSDZrr_Intkz:
- case X86::VMAXSDZrr_Intk:
- case X86::VMAXSDZrr_Intkz:
- case X86::VMINSDZrr_Intk:
- case X86::VMINSDZrr_Intkz:
- case X86::VMULSDZrr_Intk:
- case X86::VMULSDZrr_Intkz:
- case X86::VSQRTSDZr_Intk:
- case X86::VSQRTSDZr_Intkz:
- case X86::VSUBSDZrr_Intk:
- case X86::VSUBSDZrr_Intkz:
+ case X86::VADDSDZrrk_Int:
+ case X86::VADDSDZrrkz_Int:
+ case X86::VCMPSDZrrik_Int:
+ case X86::VDIVSDZrrk_Int:
+ case X86::VDIVSDZrrkz_Int:
+ case X86::VMAXSDZrrk_Int:
+ case X86::VMAXSDZrrkz_Int:
+ case X86::VMINSDZrrk_Int:
+ case X86::VMINSDZrrkz_Int:
+ case X86::VMULSDZrrk_Int:
+ case X86::VMULSDZrrkz_Int:
+ case X86::VSQRTSDZrk_Int:
+ case X86::VSQRTSDZrkz_Int:
+ case X86::VSUBSDZrrk_Int:
+ case X86::VSUBSDZrrkz_Int:
case X86::VFMADDSD4rr_Int:
case X86::VFNMADDSD4rr_Int:
case X86::VFMSUBSD4rr_Int:
@@ -7912,30 +7912,30 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI,
case X86::VFNMSUB213SDZr_Int:
case X86::VFMSUB231SDZr_Int:
case X86::VFNMSUB231SDZr_Int:
- case X86::VFMADD132SDZr_Intk:
- case X86::VFNMADD132SDZr_Intk:
- case X86::VFMADD213SDZr_Intk:
- case X86::VFNMADD213SDZr_Intk:
- case X86::VFMADD231SDZr_Intk:
- case X86::VFNMADD231SDZr_Intk:
- case X86::VFMSUB132SDZr_Intk:
- case X86::VFNMSUB132SDZr_Intk:
- case X86::VFMSUB213SDZr_Intk:
- case X86::VFNMSUB213SDZr_Intk:
- case X86::VFMSUB231SDZr_Intk:
- case X86::VFNMSUB231SDZr_Intk:
- case X86::VFMADD132SDZr_Intkz:
- case X86::VFNMADD132SDZr_Intkz:
- case X86::VFMADD213SDZr_Intkz:
- case X86::VFNMADD213SDZr_Intkz:
- case X86::VFMADD231SDZr_Intkz:
- case X86::VFNMADD231SDZr_Intkz:
- case X86::VFMSUB132SDZr_Intkz:
- case X86::VFNMSUB132SDZr_Intkz:
- case X86::VFMSUB213SDZr_Intkz:
- case X86::VFNMSUB213SDZr_Intkz:
- case X86::VFMSUB231SDZr_Intkz:
- case X86::VFNMSUB231SDZr_Intkz:
+ case X86::VFMADD132SDZrk_Int:
+ case X86::VFNMADD132SDZrk_Int:
+ case X86::VFMADD213SDZrk_Int:
+ case X86::VFNMADD213SDZrk_Int:
+ case X86::VFMADD231SDZrk_Int:
+ case X86::VFNMADD231SDZrk_Int:
+ case X86::VFMSUB132SDZrk_Int:
+ case X86::VFNMSUB132SDZrk_Int:
+ case X86::VFMSUB213SDZrk_Int:
+ case X86::VFNMSUB213SDZrk_Int:
+ case X86::VFMSUB231SDZrk_Int:
+ case X86::VFNMSUB231SDZrk_Int:
+ case X86::VFMADD132SDZrkz_Int:
+ case X86::VFNMADD132SDZrkz_Int:
+ case X86::VFMADD213SDZrkz_Int:
+ case X86::VFNMADD213SDZrkz_Int:
+ case X86::VFMADD231SDZrkz_Int:
+ case X86::VFNMADD231SDZrkz_Int:
+ case X86::VFMSUB132SDZrkz_Int:
+ case X86::VFNMSUB132SDZrkz_Int:
+ case X86::VFMSUB213SDZrkz_Int:
+ case X86::VFNMSUB213SDZrkz_Int:
+ case X86::VFMSUB231SDZrkz_Int:
+ case X86::VFNMSUB231SDZrkz_Int:
case X86::VFIXUPIMMSDZrri:
case X86::VFIXUPIMMSDZrrik:
case X86::VFIXUPIMMSDZrrikz:
@@ -7960,8 +7960,8 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI,
case X86::VREDUCESDZrrik:
case X86::VREDUCESDZrrikz:
case X86::VRNDSCALESDZrri_Int:
- case X86::VRNDSCALESDZrri_Intk:
- case X86::VRNDSCALESDZrri_Intkz:
+ case X86::VRNDSCALESDZrrik_Int:
+ case X86::VRNDSCALESDZrrikz_Int:
case X86::VRSQRT14SDZrr:
case X86::VRSQRT14SDZrrk:
case X86::VRSQRT14SDZrrkz:
@@ -7989,19 +7989,19 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI,
case X86::VMINSHZrr_Int:
case X86::VMULSHZrr_Int:
case X86::VSUBSHZrr_Int:
- case X86::VADDSHZrr_Intk:
- case X86::VADDSHZrr_Intkz:
- case X86::VCMPSHZrri_Intk:
- case X86::VDIVSHZrr_Intk:
- case X86::VDIVSHZrr_Intkz:
- case X86::VMAXSHZrr_Intk:
- case X86::VMAXSHZrr_Intkz:
- case X86::VMINSHZrr_Intk:
- case X86::VMINSHZrr_Intkz:
- case X86::VMULSHZrr_Intk:
- case X86::VMULSHZrr_Intkz:
- case X86::VSUBSHZrr_Intk:
- case X86::VSUBSHZrr_Intkz:
+ case X86::VADDSHZrrk_Int:
+ case X86::VADDSHZrrkz_Int:
+ case X86::VCMPSHZrrik_Int:
+ case X86::VDIVSHZrrk_Int:
+ case X86::VDIVSHZrrkz_Int:
+ case X86::VMAXSHZrrk_Int:
+ case X86::VMAXSHZrrkz_Int:
+ case X86::VMINSHZrrk_Int:
+ case X86::VMINSHZrrkz_Int:
+ case X86::VMULSHZrrk_Int:
+ case X86::VMULSHZrrkz_Int:
+ case X86::VSUBSHZrrk_Int:
+ case X86::VSUBSHZrrkz_Int:
case X86::VFMADD132SHZr_Int:
case X86::VFNMADD132SHZr_Int:
case X86::VFMADD213SHZr_Int:
@@ -8014,30 +8014,30 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI,
case X86::VFNMSUB213SHZr_Int:
case X86::VFMSUB231SHZr_Int:
case X86::VFNMSUB231SHZr_Int:
- case X86::VFMADD132SHZr_Intk:
- case X86::VFNMADD132SHZr_Intk:
- case X86::VFMADD213SHZr_Intk:
- case X86::VFNMADD213SHZr_Intk:
- case X86::VFMADD231SHZr_Intk:
- case X86::VFNMADD231SHZr_Intk:
- case X86::VFMSUB132SHZr_Intk:
- case X86::VFNMSUB132SHZr_Intk:
- case X86::VFMSUB213SHZr_Intk:
- case X86::VFNMSUB213SHZr_Intk:
- case X86::VFMSUB231SHZr_Intk:
- case X86::VFNMSUB231SHZr_Intk:
- case X86::VFMADD132SHZr_Intkz:
- case X86::VFNMADD132SHZr_Intkz:
- case X86::VFMADD213SHZr_Intkz:
- case X86::VFNMADD213SHZr_Intkz:
- case X86::VFMADD231SHZr_Intkz:
- case X86::VFNMADD231SHZr_Intkz:
- case X86::VFMSUB132SHZr_Intkz:
- case X86::VFNMSUB132SHZr_Intkz:
- case X86::VFMSUB213SHZr_Intkz:
- case X86::VFNMSUB213SHZr_Intkz:
- case X86::VFMSUB231SHZr_Intkz:
- case X86::VFNMSUB231SHZr_Intkz:
+ case X86::VFMADD132SHZrk_Int:
+ case X86::VFNMADD132SHZrk_Int:
+ case X86::VFMADD213SHZrk_Int:
+ case X86::VFNMADD213SHZrk_Int:
+ case X86::VFMADD231SHZrk_Int:
+ case X86::VFNMADD231SHZrk_Int:
+ case X86::VFMSUB132SHZrk_Int:
+ case X86::VFNMSUB132SHZrk_Int:
+ case X86::VFMSUB213SHZrk_Int:
+ case X86::VFNMSUB213SHZrk_Int:
+ case X86::VFMSUB231SHZrk_Int:
+ case X86::VFNMSUB231SHZrk_Int:
+ case X86::VFMADD132SHZrkz_Int:
+ case X86::VFNMADD132SHZrkz_Int:
+ case X86::VFMADD213SHZrkz_Int:
+ case X86::VFNMADD213SHZrkz_Int:
+ case X86::VFMADD231SHZrkz_Int:
+ case X86::VFNMADD231SHZrkz_Int:
+ case X86::VFMSUB132SHZrkz_Int:
+ case X86::VFNMSUB132SHZrkz_Int:
+ case X86::VFMSUB213SHZrkz_Int:
+ case X86::VFNMSUB213SHZrkz_Int:
+ case X86::VFMSUB231SHZrkz_Int:
+ case X86::VFNMSUB231SHZrkz_Int:
return false;
default:
return true;
@@ -9489,25 +9489,25 @@ bool X86InstrInfo::isHighLatencyDef(int opc) const {
case X86::VDIVSDZrm:
case X86::VDIVSDZrr:
case X86::VDIVSDZrm_Int:
- case X86::VDIVSDZrm_Intk:
- case X86::VDIVSDZrm_Intkz:
+ case X86::VDIVSDZrmk_Int:
+ case X86::VDIVSDZrmkz_Int:
case X86::VDIVSDZrr_Int:
- case X86::VDIVSDZrr_Intk:
- case X86::VDIVSDZrr_Intkz:
+ case X86::VDIVSDZrrk_Int:
+ case X86::VDIVSDZrrkz_Int:
case X86::VDIVSDZrrb_Int:
- case X86::VDIVSDZrrb_Intk:
- case X86::VDIVSDZrrb_Intkz:
+ case X86::VDIVSDZrrbk_Int:
+ case X86::VDIVSDZrrbkz_Int:
case X86::VDIVSSZrm:
case X86::VDIVSSZrr:
case X86::VDIVSSZrm_Int:
- case X86::VDIVSSZrm_Intk:
- case X86::VDIVSSZrm_Intkz:
+ case X86::VDIVSSZrmk_Int:
+ case X86::VDIVSSZrmkz_Int:
case X86::VDIVSSZrr_Int:
- case X86::VDIVSSZrr_Intk:
- case X86::VDIVSSZrr_Intkz:
+ case X86::VDIVSSZrrk_Int:
+ case X86::VDIVSSZrrkz_Int:
case X86::VDIVSSZrrb_Int:
- case X86::VDIVSSZrrb_Intk:
- case X86::VDIVSSZrrb_Intkz:
+ case X86::VDIVSSZrrbk_Int:
+ case X86::VDIVSSZrrbkz_Int:
case X86::VSQRTPDZ128m:
case X86::VSQRTPDZ128mb:
case X86::VSQRTPDZ128mbk:
@@ -9570,26 +9570,26 @@ bool X86InstrInfo::isHighLatencyDef(int opc) const {
case X86::VSQRTPSZrkz:
case X86::VSQRTSDZm:
case X86::VSQRTSDZm_Int:
- case X86::VSQRTSDZm_Intk:
- case X86::VSQRTSDZm_Intkz:
+ case X86::VSQRTSDZmk_Int:
+ case X86::VSQRTSDZmkz_Int:
case X86::VSQRTSDZr:
case X86::VSQRTSDZr_Int:
- case X86::VSQRTSDZr_Intk:
- case X86::VSQRTSDZr_Intkz:
+ case X86::VSQRTSDZrk_Int:
+ case X86::VSQRTSDZrkz_Int:
case X86::VSQRTSDZrb_Int:
- case X86::VSQRTSDZrb_Intk:
- case X86::VSQRTSDZrb_Intkz:
+ case X86::VSQRTSDZrbk_Int:
+ case X86::VSQRTSDZrbkz_Int:
case X86::VSQRTSSZm:
case X86::VSQRTSSZm_Int:
- case X86::VSQRTSSZm_Intk:
- case X86::VSQRTSSZm_Intkz:
+ case X86::VSQRTSSZmk_Int:
+ case X86::VSQRTSSZmkz_Int:
case X86::VSQRTSSZr:
case X86::VSQRTSSZr_Int:
- case X86::VSQRTSSZr_Intk:
- case X86::VSQRTSSZr_Intkz:
+ case X86::VSQRTSSZrk_Int:
+ case X86::VSQRTSSZrkz_Int:
case X86::VSQRTSSZrb_Int:
- case X86::VSQRTSSZrb_Intk:
- case X86::VSQRTSSZrb_Intkz:
+ case X86::VSQRTSSZrbk_Int:
+ case X86::VSQRTSSZrbkz_Int:
case X86::VGATHERDPDYrm:
case X86::VGATHERDPDZ128rm:
diff --git a/llvm/lib/Target/X86/X86LoadValueInjectionRetHardening.cpp b/llvm/lib/Target/X86/X86LoadValueInjectionRetHardening.cpp
index 3b370d8..64728a2 100644
--- a/llvm/lib/Target/X86/X86LoadValueInjectionRetHardening.cpp
+++ b/llvm/lib/Target/X86/X86LoadValueInjectionRetHardening.cpp
@@ -57,8 +57,6 @@ char X86LoadValueInjectionRetHardeningPass::ID = 0;
bool X86LoadValueInjectionRetHardeningPass::runOnMachineFunction(
MachineFunction &MF) {
- LLVM_DEBUG(dbgs() << "***** " << getPassName() << " : " << MF.getName()
- << " *****\n");
const X86Subtarget *Subtarget = &MF.getSubtarget<X86Subtarget>();
if (!Subtarget->useLVIControlFlowIntegrity() || !Subtarget->is64Bit())
return false; // FIXME: support 32-bit
@@ -68,6 +66,8 @@ bool X86LoadValueInjectionRetHardeningPass::runOnMachineFunction(
if (!F.hasOptNone() && skipFunction(F))
return false;
+ LLVM_DEBUG(dbgs() << "***** " << getPassName() << " : " << MF.getName()
+ << " *****\n");
++NumFunctionsConsidered;
const X86RegisterInfo *TRI = Subtarget->getRegisterInfo();
const X86InstrInfo *TII = Subtarget->getInstrInfo();
diff --git a/llvm/lib/Target/X86/X86SchedSapphireRapids.td b/llvm/lib/Target/X86/X86SchedSapphireRapids.td
index e04ff68..4f0d366 100644
--- a/llvm/lib/Target/X86/X86SchedSapphireRapids.td
+++ b/llvm/lib/Target/X86/X86SchedSapphireRapids.td
@@ -669,7 +669,7 @@ def : InstRW<[SPRWriteResGroup12], (instregex "^ADD_F(P?)rST0$",
"^VALIGN(D|Q)Z256rri((k|kz)?)$",
"^VCMPP(D|H|S)Z(128|256)rri(k?)$",
"^VCMPS(D|H|S)Zrri$",
- "^VCMPS(D|H|S)Zrr(b?)i_Int(k?)$",
+ "^VCMPS(D|H|S)Zrr(b?)i(k?)_Int$",
"^VFPCLASSP(D|H|S)Z(128|256)ri(k?)$",
"^VFPCLASSS(D|H|S)Zri(k?)$",
"^VPACK(S|U)S(DW|WB)Yrr$",
@@ -977,7 +977,7 @@ def SPRWriteResGroup49 : SchedWriteRes<[SPRPort00, SPRPort02_03_10]> {
let NumMicroOps = 2;
}
def : InstRW<[SPRWriteResGroup49], (instregex "^DIV_F(32|64)m$")>;
-def : InstRW<[SPRWriteResGroup49, ReadAfterVecLd], (instregex "^VSQRTSHZm_Int((k|kz)?)$")>;
+def : InstRW<[SPRWriteResGroup49, ReadAfterVecLd], (instregex "^VSQRTSHZm((k|kz)?)_Int$")>;
def : InstRW<[SPRWriteResGroup49, ReadAfterVecLd], (instrs VSQRTSHZm)>;
def SPRWriteResGroup50 : SchedWriteRes<[SPRPort00, SPRPort02_03_10, SPRPort05]> {
@@ -1166,11 +1166,11 @@ def : InstRW<[SPRWriteResGroup73, ReadAfterVecXLd], (instregex "^(V?)GF2P8AFFINE
def : InstRW<[SPRWriteResGroup73, ReadAfterVecXLd], (instrs VGETEXPPHZ128mbkz,
VGF2P8MULBZ128rm)>;
def : InstRW<[SPRWriteResGroup73, ReadAfterVecLd], (instregex "^V(ADD|SUB)SHZrm$",
- "^V(ADD|SUB)SHZrm_Int((k|kz)?)$",
+ "^V(ADD|SUB)SHZrm((k|kz)?)_Int$",
"^VCVTSH2SSZrm((_Int)?)$",
"^VM(AX|IN)CSHZrm$",
"^VM(AX|IN|UL)SHZrm$",
- "^VM(AX|IN|UL)SHZrm_Int((k|kz)?)$")>;
+ "^VM(AX|IN|UL)SHZrm((k|kz)?)_Int$")>;
def : InstRW<[SPRWriteResGroup73, ReadAfterVecYLd], (instregex "^VGF2P8AFFINE((INV)?)QBYrmi$",
"^VGF2P8AFFINE((INV)?)QBZ256rm(b?)i$",
"^VGF2P8MULB(Y|Z256)rm$")>;
@@ -1181,7 +1181,7 @@ def : InstRW<[SPRWriteResGroup73, ReadAfterVecXLd, ReadAfterVecXLd], (instregex
"^VFMSUBADD(132|213|231)PHZ128m((b|k|bk|kz)?)$",
"^VFMSUBADD(132|213|231)PHZ128mbkz$")>;
def : InstRW<[SPRWriteResGroup73, ReadAfterVecLd, ReadAfterVecLd], (instregex "^VF(N?)M(ADD|SUB)(132|213|231)SHZm$",
- "^VF(N?)M(ADD|SUB)(132|213|231)SHZm_Int((k|kz)?)$")>;
+ "^VF(N?)M(ADD|SUB)(132|213|231)SHZm((k|kz)?)_Int$")>;
def : InstRW<[SPRWriteResGroup73, ReadAfterVecYLd, ReadAfterVecYLd], (instregex "^VPMADD52(H|L)UQZ256m((b|k|bk|kz)?)$",
"^VPMADD52(H|L)UQZ256mbkz$")>;
@@ -2301,7 +2301,7 @@ def : InstRW<[SPRWriteResGroup218, ReadAfterVecXLd], (instregex "^(V?)ROUNDS(D|S
"^VRNDSCALEP(D|S)Z128rmbik(z?)$",
"^VRNDSCALEP(D|S)Z128rmi((kz)?)$",
"^VRNDSCALES(D|S)Zrmi$",
- "^VRNDSCALES(D|S)Zrmi_Int((k|kz)?)$")>;
+ "^VRNDSCALES(D|S)Zrmi((k|kz)?)_Int$")>;
def SPRWriteResGroup219 : SchedWriteRes<[SPRPort00_01]> {
let ReleaseAtCycles = [2];
@@ -2313,7 +2313,7 @@ def : InstRW<[SPRWriteResGroup219], (instregex "^(V?)ROUND(PD|SS)ri$",
"^(V?)ROUNDS(D|S)ri_Int$",
"^VRNDSCALEP(D|S)Z(128|256)rri((k|kz)?)$",
"^VRNDSCALES(D|S)Zrri$",
- "^VRNDSCALES(D|S)Zrri(b?)_Int((k|kz)?)$",
+ "^VRNDSCALES(D|S)Zrri(b?)((k|kz)?)_Int$",
"^VROUNDP(D|S)Yri$")>;
def SPRWriteResGroup220 : SchedWriteRes<[SPRPort00_06]> {
@@ -2530,7 +2530,7 @@ def SPRWriteResGroup249 : SchedWriteRes<[SPRPort01_05]> {
let Latency = 4;
}
def : InstRW<[SPRWriteResGroup249], (instregex "^V(ADD|SUB)P(D|S)Z(128|256)rrkz$",
- "^V(ADD|SUB)S(D|S)Zrr(b?)_Intkz$")>;
+ "^V(ADD|SUB)S(D|S)Zrr(b?)kz_Int$")>;
def SPRWriteResGroup250 : SchedWriteRes<[SPRPort00_05]> {
let Latency = 3;
@@ -2545,11 +2545,11 @@ def SPRWriteResGroup251 : SchedWriteRes<[SPRPort00_01]> {
let Latency = 6;
}
def : InstRW<[SPRWriteResGroup251], (instregex "^V(ADD|SUB)PHZ(128|256)rrk(z?)$",
- "^V(ADD|SUB)SHZrr(b?)_Intk(z?)$",
+ "^V(ADD|SUB)SHZrr(b?)k(z?)_Int$",
"^VCVT(T?)PH2(U?)WZ(128|256)rrk(z?)$",
"^VCVT(U?)W2PHZ(128|256)rrk(z?)$",
"^VF(N?)M(ADD|SUB)(132|213|231)PHZ(128|256)rk(z?)$",
- "^VF(N?)M(ADD|SUB)(132|213|231)SHZr(b?)_Intk(z?)$",
+ "^VF(N?)M(ADD|SUB)(132|213|231)SHZr(b?)k(z?)_Int$",
"^VFMADDSUB(132|213|231)PHZ(128|256)rk(z?)$",
"^VFMSUBADD(132|213|231)PHZ(128|256)rk(z?)$",
"^VGETEXPPHZ(128|256)rk(z?)$",
@@ -2560,7 +2560,7 @@ def : InstRW<[SPRWriteResGroup251], (instregex "^V(ADD|SUB)PHZ(128|256)rrk(z?)$"
"^VGETMANTSHZrri(k|bkz)$",
"^VM(AX|IN)CPHZ(128|256)rrk(z?)$",
"^VM(AX|IN|UL)PHZ(128|256)rrk(z?)$",
- "^VM(AX|IN|UL)SHZrr(b?)_Intk(z?)$")>;
+ "^VM(AX|IN|UL)SHZrr(b?)k(z?)_Int$")>;
def SPRWriteResGroup252 : SchedWriteRes<[SPRPort00]> {
let Latency = 5;
@@ -2745,7 +2745,7 @@ def : InstRW<[SPRWriteResGroup263, ReadAfterVecYLd], (instregex "^VCMPP(D|H|S)Z(
"^VPTEST(N?)M(B|D|Q|W)Z((256)?)rm(k?)$",
"^VPTEST(N?)M(D|Q)Z((256)?)rmb(k?)$")>;
def : InstRW<[SPRWriteResGroup263, ReadAfterVecLd], (instregex "^VCMPS(D|H|S)Zrmi$",
- "^VCMPS(D|H|S)Zrmi_Int(k?)$",
+ "^VCMPS(D|H|S)Zrmi(k?)_Int$",
"^VFPCLASSS(D|H|S)Zmik$")>;
def SPRWriteResGroup264 : SchedWriteRes<[SPRPort00, SPRPort02_03_10]> {
@@ -3171,7 +3171,7 @@ def : InstRW<[SPRWriteResGroup314], (instregex "^VCVT(T?)PD2(U?)QQZ(128|256)rr((
"^VPLZCNT(D|Q)Z(128|256)rr((k|kz)?)$",
"^VPMADD52(H|L)UQZ(128|256)r((k|kz)?)$",
"^VSCALEFS(D|S)Zrr((k|kz)?)$",
- "^VSCALEFS(D|S)Zrrb_Int((k|kz)?)$")>;
+ "^VSCALEFS(D|S)Zrrb((k|kz)?)_Int$")>;
def : InstRW<[SPRWriteResGroup314, ReadAfterVecLd], (instregex "^VFIXUPIMMS(D|S)Zrrib((k|kz)?)$")>;
def SPRWriteResGroup315 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_10, SPRPort05]> {
@@ -3300,7 +3300,7 @@ def SPRWriteResGroup331 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_10]> {
let NumMicroOps = 2;
}
def : InstRW<[SPRWriteResGroup331], (instregex "^VCVTPH2PSZ(128|256)rmk(z?)$")>;
-def : InstRW<[SPRWriteResGroup331, ReadAfterVecLd], (instregex "^VCVTSH2SSZrm_Intk(z?)$")>;
+def : InstRW<[SPRWriteResGroup331, ReadAfterVecLd], (instregex "^VCVTSH2SSZrmk(z?)_Int$")>;
def : InstRW<[SPRWriteResGroup331, ReadAfterVecXLd], (instregex "^VPMADDUBSWZ128rmk(z?)$",
"^VPMULH((U|RS)?)WZ128rmk(z?)$",
"^VPMULLWZ128rmk(z?)$")>;
@@ -3460,7 +3460,7 @@ def SPRWriteResGroup353 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort0
let Latency = 21;
let NumMicroOps = 7;
}
-def : InstRW<[SPRWriteResGroup353, ReadAfterVecLd], (instregex "^VCVTSD2SHZrm_Intk(z?)$")>;
+def : InstRW<[SPRWriteResGroup353, ReadAfterVecLd], (instregex "^VCVTSD2SHZrmk(z?)_Int$")>;
def SPRWriteResGroup354 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort05]> {
let ReleaseAtCycles = [2, 1, 1];
@@ -3475,7 +3475,7 @@ def SPRWriteResGroup355 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort0
let Latency = 14;
let NumMicroOps = 4;
}
-def : InstRW<[SPRWriteResGroup355], (instregex "^VCVTSD2SHZrr(b?)_Intk(z?)$")>;
+def : InstRW<[SPRWriteResGroup355], (instregex "^VCVTSD2SHZrr(b?)k(z?)_Int$")>;
def SPRWriteResGroup356 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_10, SPRPort05]> {
let ReleaseAtCycles = [2, 1, 1];
@@ -3489,7 +3489,7 @@ def SPRWriteResGroup357 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_10, SPRPort0
let Latency = 20;
let NumMicroOps = 4;
}
-def : InstRW<[SPRWriteResGroup357, ReadAfterVecLd], (instregex "^VCVTSH2SDZrm_Intk(z?)$")>;
+def : InstRW<[SPRWriteResGroup357, ReadAfterVecLd], (instregex "^VCVTSH2SDZrmk(z?)_Int$")>;
def SPRWriteResGroup358 : SchedWriteRes<[SPRPort00_01, SPRPort05]> {
let ReleaseAtCycles = [2, 1];
@@ -3504,7 +3504,7 @@ def SPRWriteResGroup359 : SchedWriteRes<[SPRPort00_01, SPRPort05]> {
let Latency = 13;
let NumMicroOps = 3;
}
-def : InstRW<[SPRWriteResGroup359], (instregex "^VCVTSH2SDZrr(b?)_Intk(z?)$")>;
+def : InstRW<[SPRWriteResGroup359], (instregex "^VCVTSH2SDZrr(b?)k(z?)_Int$")>;
def SPRWriteResGroup360 : SchedWriteRes<[SPRPort00, SPRPort00_01, SPRPort02_03_10]> {
let Latency = 13;
@@ -3523,7 +3523,7 @@ def : InstRW<[SPRWriteResGroup361], (instregex "^VCVT(T?)SH2(U?)SI((64)?)Zrr(b?)
def SPRWriteResGroup362 : SchedWriteRes<[SPRPort00_01]> {
let Latency = 8;
}
-def : InstRW<[SPRWriteResGroup362], (instregex "^VCVTSH2SSZrr(b?)_Intk(z?)$")>;
+def : InstRW<[SPRWriteResGroup362], (instregex "^VCVTSH2SSZrr(b?)k(z?)_Int$")>;
def SPRWriteResGroup363 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort02_03_10]> {
let Latency = 14;
@@ -3536,7 +3536,7 @@ def SPRWriteResGroup364 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort0
let Latency = 16;
let NumMicroOps = 3;
}
-def : InstRW<[SPRWriteResGroup364, ReadAfterVecLd], (instregex "^VCVTSS2SHZrm_Intk(z?)$")>;
+def : InstRW<[SPRWriteResGroup364, ReadAfterVecLd], (instregex "^VCVTSS2SHZrmk(z?)_Int$")>;
def SPRWriteResGroup365 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05]> {
let Latency = 6;
@@ -3549,7 +3549,7 @@ def SPRWriteResGroup366 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05]> {
let Latency = 9;
let NumMicroOps = 2;
}
-def : InstRW<[SPRWriteResGroup366], (instregex "^VCVTSS2SHZrr(b?)_Intk(z?)$")>;
+def : InstRW<[SPRWriteResGroup366], (instregex "^VCVTSS2SHZrr(b?)k(z?)_Int$")>;
def SPRWriteResGroup367 : SchedWriteRes<[SPRPort05]> {
let Latency = 5;
@@ -3667,7 +3667,7 @@ def SPRWriteResGroup380 : SchedWriteRes<[SPRPort00, SPRPort02_03_10]> {
let Latency = 21;
let NumMicroOps = 2;
}
-def : InstRW<[SPRWriteResGroup380, ReadAfterVecLd], (instregex "^VDIVSHZrm_Int((k|kz)?)$")>;
+def : InstRW<[SPRWriteResGroup380, ReadAfterVecLd], (instregex "^VDIVSHZrm((k|kz)?)_Int$")>;
def : InstRW<[SPRWriteResGroup380, ReadAfterVecLd], (instrs VDIVSHZrm)>;
def SPRWriteResGroup381 : SchedWriteRes<[SPRPort00]> {
@@ -4884,7 +4884,7 @@ def SPRWriteResGroup534 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_10]> {
let NumMicroOps = 3;
}
def : InstRW<[SPRWriteResGroup534, ReadAfterVecXLd], (instregex "^VRNDSCALEPHZ128rm(b?)ik(z?)$",
- "^VRNDSCALESHZrmi_Intk(z?)$",
+ "^VRNDSCALESHZrmik(z?)_Int$",
"^VSCALEFPHZ128rm(bk|kz)$",
"^VSCALEFPHZ128rm(k|bkz)$")>;
def : InstRW<[SPRWriteResGroup534, ReadAfterVecYLd], (instregex "^VRNDSCALEPHZ256rm(b?)ik(z?)$",
@@ -4898,9 +4898,9 @@ def SPRWriteResGroup535 : SchedWriteRes<[SPRPort00_01]> {
let NumMicroOps = 2;
}
def : InstRW<[SPRWriteResGroup535], (instregex "^VRNDSCALEPHZ(128|256)rrik(z?)$",
- "^VRNDSCALESHZrri(b?)_Intk(z?)$",
+ "^VRNDSCALESHZrri(b?)k(z?)_Int$",
"^VSCALEFPHZ(128|256)rrk(z?)$",
- "^VSCALEFSHZrrb_Intk(z?)$",
+ "^VSCALEFSHZrrbk(z?)_Int$",
"^VSCALEFSHZrrk(z?)$")>;
def SPRWriteResGroup536 : SchedWriteRes<[SPRPort00, SPRPort02_03_10]> {
@@ -4944,7 +4944,7 @@ def SPRWriteResGroup540 : SchedWriteRes<[SPRPort00, SPRPort02_03_10]> {
}
def : InstRW<[SPRWriteResGroup540, ReadAfterVecXLd], (instregex "^VSQRTPDZ128m(bk|kz)$",
"^VSQRTPDZ128m(k|bkz)$")>;
-def : InstRW<[SPRWriteResGroup540, ReadAfterVecLd], (instregex "^VSQRTSDZm_Intk(z?)$")>;
+def : InstRW<[SPRWriteResGroup540, ReadAfterVecLd], (instregex "^VSQRTSDZmk(z?)_Int$")>;
def SPRWriteResGroup541 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort02_03_10]> {
let ReleaseAtCycles = [2, 1, 1];
diff --git a/llvm/lib/Target/X86/X86ScheduleZnver4.td b/llvm/lib/Target/X86/X86ScheduleZnver4.td
index 38f9b5e..c5478dd 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver4.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver4.td
@@ -1545,7 +1545,7 @@ def Zn4WriteSCALErr: SchedWriteRes<[Zn4FPFMisc23]> {
let NumMicroOps = 2;
}
def : InstRW<[Zn4WriteSCALErr], (instregex
- "V(SCALEF|REDUCE)(S|P)(S|D)(Z?|Z128?|Z256?)(rr|rrb|rrkz|rrik|rrikz|rri)(_Int?|_Intkz?)",
+ "V(SCALEF|REDUCE)(S|P)(S|D)(Z?|Z128?|Z256?)(rr|rrb|rrkz|rrik|rrikz|rri)(_Int?)",
"(V?)REDUCE(PD|PS|SD|SS)(Z?|Z128?)(rri|rrikz|rrib)"
)>;
@@ -1585,7 +1585,7 @@ def : InstRW<[Zn4WriteSHIFTrr], (instregex
"(V?)P(ROL|ROR)(D|Q|VD|VQ)(Z?|Z128?|Z256?)(rr|rrk|rrkz)",
"(V?)P(ROL|ROR)(D|Q|VD|VQ)(Z256?)(ri|rik|rikz)",
"(V?)P(ROL|ROR)(D|Q)(Z?|Z128?)(ri|rik|rikz)",
- "VPSHUFBITQMBZ128rr", "VFMSUB231SSZr_Intkz"
+ "VPSHUFBITQMBZ128rr", "VFMSUB231SSZrkz_Int"
)>;
def Zn4WriteSHIFTri: SchedWriteRes<[Zn4FPFMisc01]> {
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 808f48e..c19bcfc 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -1650,6 +1650,13 @@ InstructionCost X86TTIImpl::getShuffleCost(
return MatchingTypes ? TTI::TCC_Free : SubLT.first;
}
+ // Attempt to match MOVSS (Idx == 0) or INSERTPS pattern. This will have
+ // been matched by improveShuffleKindFromMask as a SK_InsertSubvector of
+ // v1f32 (legalised to f32) into a v4f32.
+ if (LT.first == 1 && LT.second == MVT::v4f32 && SubLT.first == 1 &&
+ SubLT.second == MVT::f32 && (Index == 0 || ST->hasSSE41()))
+ return 1;
+
// If the insertion isn't aligned, treat it like a 2-op shuffle.
Kind = TTI::SK_PermuteTwoSrc;
}
@@ -1698,8 +1705,7 @@ InstructionCost X86TTIImpl::getShuffleCost(
// We are going to permute multiple sources and the result will be in multiple
// destinations. Providing an accurate cost only for splits where the element
// type remains the same.
- if ((Kind == TTI::SK_PermuteSingleSrc || Kind == TTI::SK_PermuteTwoSrc) &&
- LT.first != 1) {
+ if (LT.first != 1) {
MVT LegalVT = LT.second;
if (LegalVT.isVector() &&
LegalVT.getVectorElementType().getSizeInBits() ==
@@ -2227,9 +2233,18 @@ InstructionCost X86TTIImpl::getShuffleCost(
{ TTI::SK_PermuteTwoSrc, MVT::v4f32, 2 }, // 2*shufps
};
- if (ST->hasSSE1())
+ if (ST->hasSSE1()) {
+ if (LT.first == 1 && LT.second == MVT::v4f32 && Mask.size() == 4) {
+ // SHUFPS: both pairs must come from the same source register.
+ auto MatchSHUFPS = [](int X, int Y) {
+ return X < 0 || Y < 0 || ((X & 4) == (Y & 4));
+ };
+ if (MatchSHUFPS(Mask[0], Mask[1]) && MatchSHUFPS(Mask[2], Mask[3]))
+ return 1;
+ }
if (const auto *Entry = CostTableLookup(SSE1ShuffleTbl, Kind, LT.second))
return LT.first * Entry->Cost;
+ }
return BaseT::getShuffleCost(Kind, BaseTp, Mask, CostKind, Index, SubTp);
}
@@ -4789,9 +4804,12 @@ InstructionCost X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
MVT MScalarTy = LT.second.getScalarType();
auto IsCheapPInsrPExtrInsertPS = [&]() {
// Assume pinsr/pextr XMM <-> GPR is relatively cheap on all targets.
+ // Inserting f32 into index0 is just movss.
// Also, assume insertps is relatively cheap on all >= SSE41 targets.
return (MScalarTy == MVT::i16 && ST->hasSSE2()) ||
(MScalarTy.isInteger() && ST->hasSSE41()) ||
+ (MScalarTy == MVT::f32 && ST->hasSSE1() && Index == 0 &&
+ Opcode == Instruction::InsertElement) ||
(MScalarTy == MVT::f32 && ST->hasSSE41() &&
Opcode == Instruction::InsertElement);
};