aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPU.td32
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp6
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp4
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h18
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h1
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp5
6 files changed, 38 insertions, 28 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 5c88e3f..67175fe 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -90,7 +90,7 @@ def FeatureAddNoCarryInsts : SubtargetFeature<"add-no-carry-insts",
def FeatureUnalignedBufferAccess : SubtargetFeature<"unaligned-buffer-access",
"UnalignedBufferAccess",
"true",
- "Support unaligned global loads and stores"
+ "Hardware supports unaligned global loads and stores"
>;
def FeatureTrapHandler: SubtargetFeature<"trap-handler",
@@ -105,18 +105,10 @@ def FeatureUnalignedScratchAccess : SubtargetFeature<"unaligned-scratch-access",
"Support unaligned scratch loads and stores"
>;
-// LDS alignment enforcement is controlled by a configuration register:
-// SH_MEM_CONFIG.alignment_mode
-def FeatureUnalignedAccessMode : SubtargetFeature<"unaligned-access-mode",
- "UnalignedAccessMode",
- "true",
- "Support unaligned local and region loads and stores"
->;
-
def FeatureUnalignedDSAccess : SubtargetFeature<"unaligned-ds-access",
"UnalignedDSAccess",
"true",
- "Does not requires 16 byte alignment for certain local and region loads and stores"
+ "Hardware supports unaligned local and region loads and stores"
>;
def FeatureApertureRegs : SubtargetFeature<"aperture-regs",
@@ -653,6 +645,15 @@ def FeatureTrigReducedRange : SubtargetFeature<"trig-reduced-range",
"Requires use of fract on arguments to trig instructions"
>;
+// Alignment enforcement is controlled by a configuration register:
+// SH_MEM_CONFIG.alignment_mode
+def FeatureUnalignedAccessMode : SubtargetFeature<"unaligned-access-mode",
+ "UnalignedAccessMode",
+ "true",
+ "Enable unaligned global, local and region loads and stores if the hardware"
+ " supports it"
+>;
+
// Dummy feature used to disable assembler instructions.
def FeatureDisable : SubtargetFeature<"",
"FeatureDisable","true",
@@ -679,7 +680,8 @@ def FeatureSeaIslands : GCNSubtargetFeatureGeneration<"SEA_ISLANDS",
FeatureWavefrontSize64, FeatureFlatAddressSpace,
FeatureCIInsts, FeatureMovrel, FeatureTrigReducedRange,
FeatureGFX7GFX8GFX9Insts, FeatureSMemTimeInst, FeatureMadMacF32Insts,
- FeatureDsSrc2Insts, FeatureDoesNotSupportSRAMECC]
+ FeatureDsSrc2Insts, FeatureDoesNotSupportSRAMECC,
+ FeatureUnalignedBufferAccess]
>;
def FeatureVolcanicIslands : GCNSubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
@@ -692,7 +694,8 @@ def FeatureVolcanicIslands : GCNSubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
FeatureSDWA, FeatureSDWAOutModsVOPC, FeatureSDWAMac, FeatureDPP,
FeatureIntClamp, FeatureTrigReducedRange, FeatureGFX8Insts,
FeatureGFX7GFX8GFX9Insts, FeatureSMemTimeInst, FeatureMadMacF32Insts,
- FeatureDsSrc2Insts, FeatureDoesNotSupportSRAMECC, FeatureFastDenormalF32
+ FeatureDsSrc2Insts, FeatureDoesNotSupportSRAMECC, FeatureFastDenormalF32,
+ FeatureUnalignedBufferAccess
]
>;
@@ -709,7 +712,8 @@ def FeatureGFX9 : GCNSubtargetFeatureGeneration<"GFX9",
FeatureAddNoCarryInsts, FeatureGFX8Insts, FeatureGFX7GFX8GFX9Insts,
FeatureScalarFlatScratchInsts, FeatureScalarAtomics, FeatureR128A16,
FeatureSMemTimeInst, FeatureMadMacF32Insts, FeatureDsSrc2Insts,
- FeatureFastDenormalF32, FeatureUnalignedDSAccess
+ FeatureFastDenormalF32, FeatureUnalignedBufferAccess,
+ FeatureUnalignedDSAccess
]
>;
@@ -728,7 +732,7 @@ def FeatureGFX10 : GCNSubtargetFeatureGeneration<"GFX10",
FeatureVOP3Literal, FeatureDPP8,
FeatureNoDataDepHazard, FeaturePkFmacF16Inst, FeatureDoesNotSupportSRAMECC,
FeatureGFX10A16, FeatureFastDenormalF32, FeatureG16,
- FeatureUnalignedDSAccess
+ FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess
]
>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 20262f0..17ac0c0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -1051,9 +1051,9 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
return false;
};
- unsigned GlobalAlign32 = ST.hasUnalignedBufferAccess() ? 0 : 32;
- unsigned GlobalAlign16 = ST.hasUnalignedBufferAccess() ? 0 : 16;
- unsigned GlobalAlign8 = ST.hasUnalignedBufferAccess() ? 0 : 8;
+ unsigned GlobalAlign32 = ST.hasUnalignedBufferAccessEnabled() ? 0 : 32;
+ unsigned GlobalAlign16 = ST.hasUnalignedBufferAccessEnabled() ? 0 : 16;
+ unsigned GlobalAlign8 = ST.hasUnalignedBufferAccessEnabled() ? 0 : 8;
// TODO: Refine based on subtargets which support unaligned access or 128-bit
// LDS
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 4968c5c..3698881 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -81,7 +81,7 @@ GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
SmallString<256> FullFS("+promote-alloca,+load-store-opt,+enable-ds128,+sram-ecc,+xnack,");
if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA.
- FullFS += "+flat-for-global,+unaligned-buffer-access,+trap-handler,";
+ FullFS += "+flat-for-global,+unaligned-access-mode,+trap-handler,";
FullFS += "+enable-prt-strict-null,"; // This is overridden by a disable in FS
@@ -186,7 +186,6 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
AutoWaitcntBeforeBarrier(false),
CodeObjectV3(false),
UnalignedScratchAccess(false),
- UnalignedBufferAccess(false),
UnalignedAccessMode(false),
HasApertureRegs(false),
@@ -258,6 +257,7 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
HasUnpackedD16VMem(false),
LDSMisalignedBug(false),
HasMFMAInlineLiteralBug(false),
+ UnalignedBufferAccess(false),
UnalignedDSAccess(false),
ScalarizeGlobal(false),
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 20f0658..a0ab287 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -314,7 +314,6 @@ protected:
bool AutoWaitcntBeforeBarrier;
bool CodeObjectV3;
bool UnalignedScratchAccess;
- bool UnalignedBufferAccess;
bool UnalignedAccessMode;
bool HasApertureRegs;
bool EnableXNACK;
@@ -395,6 +394,7 @@ protected:
bool HasMFMAInlineLiteralBug;
bool HasVertexCache;
short TexVTXClauseSize;
+ bool UnalignedBufferAccess;
bool UnalignedDSAccess;
bool ScalarizeGlobal;
@@ -697,6 +697,18 @@ public:
return UnalignedBufferAccess;
}
+ bool hasUnalignedBufferAccessEnabled() const {
+ return UnalignedBufferAccess && UnalignedAccessMode;
+ }
+
+ bool hasUnalignedDSAccess() const {
+ return UnalignedDSAccess;
+ }
+
+ bool hasUnalignedDSAccessEnabled() const {
+ return UnalignedDSAccess && UnalignedAccessMode;
+ }
+
bool hasUnalignedScratchAccess() const {
return UnalignedScratchAccess;
}
@@ -705,10 +717,6 @@ public:
return UnalignedAccessMode;
}
- bool hasUnalignedDSAccess() const {
- return UnalignedDSAccess;
- }
-
bool hasApertureRegs() const {
return HasApertureRegs;
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
index aa3cc75..7929d5b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@@ -88,7 +88,6 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
AMDGPU::FeatureEnableUnsafeDSOffsetFolding,
AMDGPU::FeatureFlatForGlobal,
AMDGPU::FeaturePromoteAlloca,
- AMDGPU::FeatureUnalignedBufferAccess,
AMDGPU::FeatureUnalignedScratchAccess,
AMDGPU::FeatureUnalignedAccessMode,
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 2807db6..39c2ebc 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1398,8 +1398,7 @@ bool SITargetLowering::allowsMisalignedMemoryAccessesImpl(
AddrSpace == AMDGPUAS::REGION_ADDRESS) {
// Check if alignment requirements for ds_read/write instructions are
// disabled.
- if (Subtarget->hasUnalignedDSAccess() &&
- Subtarget->hasUnalignedAccessMode()) {
+ if (Subtarget->hasUnalignedDSAccessEnabled()) {
if (IsFast)
*IsFast = true;
return true;
@@ -1450,7 +1449,7 @@ bool SITargetLowering::allowsMisalignedMemoryAccessesImpl(
return AlignedBy4;
}
- if (Subtarget->hasUnalignedBufferAccess() &&
+ if (Subtarget->hasUnalignedBufferAccessEnabled() &&
!(AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
AddrSpace == AMDGPUAS::REGION_ADDRESS)) {
// If we have an uniform constant load, it still requires using a slow