diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPU.td | 10 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 3 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/GCNSubtarget.h | 5 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp | 3 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 6 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SOPInstructions.td | 7 |
6 files changed, 28 insertions, 6 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 1c8383c..54d94b1 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -1466,6 +1466,13 @@ def FeatureClusters : SubtargetFeature< "clusters", "Has clusters of workgroups support" >; +def FeatureWaitsBeforeSystemScopeStores : SubtargetFeature< + "waits-before-system-scope-stores", + "RequiresWaitsBeforeSystemScopeStores", + "true", + "Target requires waits for loads and atomics before system scope stores" +>; + // Dummy feature used to disable assembler instructions. def FeatureDisable : SubtargetFeature<"", "FeatureDisable","true", @@ -2060,7 +2067,8 @@ def FeatureISAVersion12 : FeatureSet< FeatureMaxHardClauseLength32, Feature1_5xVGPRs, FeatureMemoryAtomicFAddF32DenormalSupport, - FeatureBVHDualAndBVH8Insts + FeatureBVHDualAndBVH8Insts, + FeatureWaitsBeforeSystemScopeStores, ]>; def FeatureISAVersion12_50 : FeatureSet< diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 5580e4c..09338c5 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -9028,6 +9028,9 @@ void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); + // Parse a dummy operand as a placeholder for the SWZ operand. This enforces + // agreement between MCInstrDesc.getNumOperands and MCInst.getNumOperands. + Inst.addOperand(MCOperand::createImm(0)); } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index ac660d5..f377b8a 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -290,6 +290,7 @@ protected: bool Has45BitNumRecordsBufferResource = false; bool HasClusters = false; + bool RequiresWaitsBeforeSystemScopeStores = false; // Dummy feature to use for assembler in tablegen. bool FeatureDisable = false; @@ -1861,6 +1862,10 @@ public: bool has45BitNumRecordsBufferResource() const { return Has45BitNumRecordsBufferResource; } + + bool requiresWaitsBeforeSystemScopeStores() const { + return RequiresWaitsBeforeSystemScopeStores; + } }; class GCNUserSGPRUsageInfo { diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp index a177a42..6ab8d552 100644 --- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp +++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp @@ -2673,7 +2673,8 @@ bool SIGfx12CacheControl::finalizeStore(MachineInstr &MI, bool Atomic) const { const unsigned Scope = CPol->getImm() & CPol::SCOPE; // GFX12.0 only: Extra waits needed before system scope stores. - if (!ST.hasGFX1250Insts() && !Atomic && Scope == CPol::SCOPE_SYS) + if (ST.requiresWaitsBeforeSystemScopeStores() && !Atomic && + Scope == CPol::SCOPE_SYS) Changed |= insertWaitsBeforeSystemScopeStore(MI.getIterator()); return Changed; diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index ebd2e7e..d80a6f3 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -1874,9 +1874,13 @@ void SIRegisterInfo::buildSpillLoadStore( } bool IsSrcDstDef = SrcDstRegState & RegState::Define; + bool PartialReloadCopy = (RemEltSize != EltSize) && !IsStore; if (NeedSuperRegImpOperand && - (IsFirstSubReg || (IsLastSubReg && !IsSrcDstDef))) + (IsFirstSubReg || (IsLastSubReg && !IsSrcDstDef))) { MIB.addReg(ValueReg, RegState::Implicit | SrcDstRegState); + if (PartialReloadCopy) + MIB.addReg(ValueReg, RegState::Implicit); + } // The epilog restore of a wwm-scratch register can cause undesired // optimization during machine-cp post PrologEpilogInserter if the same diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td index 84287b6..1931e0b 100644 --- a/llvm/lib/Target/AMDGPU/SOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -838,9 +838,10 @@ def S_CBRANCH_G_FORK : SOP2_Pseudo < let SubtargetPredicate = isGFX6GFX7GFX8GFX9; } -let Defs = [SCC] in { -def S_ABSDIFF_I32 : SOP2_32 <"s_absdiff_i32">; -} // End Defs = [SCC] +let isCommutable = 1, Defs = [SCC] in +def S_ABSDIFF_I32 : SOP2_32 <"s_absdiff_i32", + [(set i32:$sdst, (UniformUnaryFrag<abs> (sub_oneuse i32:$src0, i32:$src1)))] +>; let SubtargetPredicate = isGFX8GFX9 in { def S_RFE_RESTORE_B64 : SOP2_Pseudo < |
