diff options
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPU.td | 32 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 6 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp | 4 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h | 18 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h | 1 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 5 |
6 files changed, 38 insertions, 28 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 5c88e3f..67175fe 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -90,7 +90,7 @@ def FeatureAddNoCarryInsts : SubtargetFeature<"add-no-carry-insts", def FeatureUnalignedBufferAccess : SubtargetFeature<"unaligned-buffer-access", "UnalignedBufferAccess", "true", - "Support unaligned global loads and stores" + "Hardware supports unaligned global loads and stores" >; def FeatureTrapHandler: SubtargetFeature<"trap-handler", @@ -105,18 +105,10 @@ def FeatureUnalignedScratchAccess : SubtargetFeature<"unaligned-scratch-access", "Support unaligned scratch loads and stores" >; -// LDS alignment enforcement is controlled by a configuration register: -// SH_MEM_CONFIG.alignment_mode -def FeatureUnalignedAccessMode : SubtargetFeature<"unaligned-access-mode", - "UnalignedAccessMode", - "true", - "Support unaligned local and region loads and stores" ->; - def FeatureUnalignedDSAccess : SubtargetFeature<"unaligned-ds-access", "UnalignedDSAccess", "true", - "Does not requires 16 byte alignment for certain local and region loads and stores" + "Hardware supports unaligned local and region loads and stores" >; def FeatureApertureRegs : SubtargetFeature<"aperture-regs", @@ -653,6 +645,15 @@ def FeatureTrigReducedRange : SubtargetFeature<"trig-reduced-range", "Requires use of fract on arguments to trig instructions" >; +// Alignment enforcement is controlled by a configuration register: +// SH_MEM_CONFIG.alignment_mode +def FeatureUnalignedAccessMode : SubtargetFeature<"unaligned-access-mode", + "UnalignedAccessMode", + "true", + "Enable unaligned global, local and region loads and stores if the hardware" + " supports it" +>; + // Dummy feature used to disable assembler instructions. def FeatureDisable : SubtargetFeature<"", "FeatureDisable","true", @@ -679,7 +680,8 @@ def FeatureSeaIslands : GCNSubtargetFeatureGeneration<"SEA_ISLANDS", FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureCIInsts, FeatureMovrel, FeatureTrigReducedRange, FeatureGFX7GFX8GFX9Insts, FeatureSMemTimeInst, FeatureMadMacF32Insts, - FeatureDsSrc2Insts, FeatureDoesNotSupportSRAMECC] + FeatureDsSrc2Insts, FeatureDoesNotSupportSRAMECC, + FeatureUnalignedBufferAccess] >; def FeatureVolcanicIslands : GCNSubtargetFeatureGeneration<"VOLCANIC_ISLANDS", @@ -692,7 +694,8 @@ def FeatureVolcanicIslands : GCNSubtargetFeatureGeneration<"VOLCANIC_ISLANDS", FeatureSDWA, FeatureSDWAOutModsVOPC, FeatureSDWAMac, FeatureDPP, FeatureIntClamp, FeatureTrigReducedRange, FeatureGFX8Insts, FeatureGFX7GFX8GFX9Insts, FeatureSMemTimeInst, FeatureMadMacF32Insts, - FeatureDsSrc2Insts, FeatureDoesNotSupportSRAMECC, FeatureFastDenormalF32 + FeatureDsSrc2Insts, FeatureDoesNotSupportSRAMECC, FeatureFastDenormalF32, + FeatureUnalignedBufferAccess ] >; @@ -709,7 +712,8 @@ def FeatureGFX9 : GCNSubtargetFeatureGeneration<"GFX9", FeatureAddNoCarryInsts, FeatureGFX8Insts, FeatureGFX7GFX8GFX9Insts, FeatureScalarFlatScratchInsts, FeatureScalarAtomics, FeatureR128A16, FeatureSMemTimeInst, FeatureMadMacF32Insts, FeatureDsSrc2Insts, - FeatureFastDenormalF32, FeatureUnalignedDSAccess + FeatureFastDenormalF32, FeatureUnalignedBufferAccess, + FeatureUnalignedDSAccess ] >; @@ -728,7 +732,7 @@ def FeatureGFX10 : GCNSubtargetFeatureGeneration<"GFX10", FeatureVOP3Literal, FeatureDPP8, FeatureNoDataDepHazard, FeaturePkFmacF16Inst, FeatureDoesNotSupportSRAMECC, FeatureGFX10A16, FeatureFastDenormalF32, FeatureG16, - FeatureUnalignedDSAccess + FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess ] >; diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 20262f0..17ac0c0 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -1051,9 +1051,9 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, return false; }; - unsigned GlobalAlign32 = ST.hasUnalignedBufferAccess() ? 0 : 32; - unsigned GlobalAlign16 = ST.hasUnalignedBufferAccess() ? 0 : 16; - unsigned GlobalAlign8 = ST.hasUnalignedBufferAccess() ? 0 : 8; + unsigned GlobalAlign32 = ST.hasUnalignedBufferAccessEnabled() ? 0 : 32; + unsigned GlobalAlign16 = ST.hasUnalignedBufferAccessEnabled() ? 0 : 16; + unsigned GlobalAlign8 = ST.hasUnalignedBufferAccessEnabled() ? 0 : 8; // TODO: Refine based on subtargets which support unaligned access or 128-bit // LDS diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index 4968c5c..3698881 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -81,7 +81,7 @@ GCNSubtarget::initializeSubtargetDependencies(const Triple &TT, SmallString<256> FullFS("+promote-alloca,+load-store-opt,+enable-ds128,+sram-ecc,+xnack,"); if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA. - FullFS += "+flat-for-global,+unaligned-buffer-access,+trap-handler,"; + FullFS += "+flat-for-global,+unaligned-access-mode,+trap-handler,"; FullFS += "+enable-prt-strict-null,"; // This is overridden by a disable in FS @@ -186,7 +186,6 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, AutoWaitcntBeforeBarrier(false), CodeObjectV3(false), UnalignedScratchAccess(false), - UnalignedBufferAccess(false), UnalignedAccessMode(false), HasApertureRegs(false), @@ -258,6 +257,7 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, HasUnpackedD16VMem(false), LDSMisalignedBug(false), HasMFMAInlineLiteralBug(false), + UnalignedBufferAccess(false), UnalignedDSAccess(false), ScalarizeGlobal(false), diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h index 20f0658..a0ab287 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -314,7 +314,6 @@ protected: bool AutoWaitcntBeforeBarrier; bool CodeObjectV3; bool UnalignedScratchAccess; - bool UnalignedBufferAccess; bool UnalignedAccessMode; bool HasApertureRegs; bool EnableXNACK; @@ -395,6 +394,7 @@ protected: bool HasMFMAInlineLiteralBug; bool HasVertexCache; short TexVTXClauseSize; + bool UnalignedBufferAccess; bool UnalignedDSAccess; bool ScalarizeGlobal; @@ -697,6 +697,18 @@ public: return UnalignedBufferAccess; } + bool hasUnalignedBufferAccessEnabled() const { + return UnalignedBufferAccess && UnalignedAccessMode; + } + + bool hasUnalignedDSAccess() const { + return UnalignedDSAccess; + } + + bool hasUnalignedDSAccessEnabled() const { + return UnalignedDSAccess && UnalignedAccessMode; + } + bool hasUnalignedScratchAccess() const { return UnalignedScratchAccess; } @@ -705,10 +717,6 @@ public: return UnalignedAccessMode; } - bool hasUnalignedDSAccess() const { - return UnalignedDSAccess; - } - bool hasApertureRegs() const { return HasApertureRegs; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h index aa3cc75..7929d5b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h @@ -88,7 +88,6 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> { AMDGPU::FeatureEnableUnsafeDSOffsetFolding, AMDGPU::FeatureFlatForGlobal, AMDGPU::FeaturePromoteAlloca, - AMDGPU::FeatureUnalignedBufferAccess, AMDGPU::FeatureUnalignedScratchAccess, AMDGPU::FeatureUnalignedAccessMode, diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 2807db6..39c2ebc 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -1398,8 +1398,7 @@ bool SITargetLowering::allowsMisalignedMemoryAccessesImpl( AddrSpace == AMDGPUAS::REGION_ADDRESS) { // Check if alignment requirements for ds_read/write instructions are // disabled. - if (Subtarget->hasUnalignedDSAccess() && - Subtarget->hasUnalignedAccessMode()) { + if (Subtarget->hasUnalignedDSAccessEnabled()) { if (IsFast) *IsFast = true; return true; @@ -1450,7 +1449,7 @@ bool SITargetLowering::allowsMisalignedMemoryAccessesImpl( return AlignedBy4; } - if (Subtarget->hasUnalignedBufferAccess() && + if (Subtarget->hasUnalignedBufferAccessEnabled() && !(AddrSpace == AMDGPUAS::LOCAL_ADDRESS || AddrSpace == AMDGPUAS::REGION_ADDRESS)) { // If we have an uniform constant load, it still requires using a slow |
