aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib
diff options
context:
space:
mode:
authorStanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>2025-08-07 12:59:50 -0700
committerGitHub <noreply@github.com>2025-08-07 12:59:50 -0700
commitd09dbdabb93ffdd6df25ae487c95a552f13e5e16 (patch)
tree1d74ca8b8acc501386a3aae07d1ab15f5e8e713f /llvm/lib
parentc43c1c0c45fc1ec3fab7abd6e19b318f6468bf28 (diff)
downloadllvm-d09dbdabb93ffdd6df25ae487c95a552f13e5e16.zip
llvm-d09dbdabb93ffdd6df25ae487c95a552f13e5e16.tar.gz
llvm-d09dbdabb93ffdd6df25ae487c95a552f13e5e16.tar.bz2
[AMDGPU] bf16 clamp folding (#152573)
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/AMDGPU/SIFoldOperands.cpp10
1 files changed, 7 insertions, 3 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 0c653b1..962c276 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -2081,7 +2081,9 @@ SIFoldOperandsImpl::isClamp(const MachineInstr &MI) const {
case AMDGPU::V_MAX_F16_fake16_e64:
case AMDGPU::V_MAX_F64_e64:
case AMDGPU::V_MAX_NUM_F64_e64:
- case AMDGPU::V_PK_MAX_F16: {
+ case AMDGPU::V_PK_MAX_F16:
+ case AMDGPU::V_MAX_BF16_PSEUDO_e64:
+ case AMDGPU::V_PK_MAX_NUM_BF16: {
if (MI.mayRaiseFPException())
return nullptr;
@@ -2108,8 +2110,10 @@ SIFoldOperandsImpl::isClamp(const MachineInstr &MI) const {
// Having a 0 op_sel_hi would require swizzling the output in the source
// instruction, which we can't do.
- unsigned UnsetMods = (Op == AMDGPU::V_PK_MAX_F16) ? SISrcMods::OP_SEL_1
- : 0u;
+ unsigned UnsetMods =
+ (Op == AMDGPU::V_PK_MAX_F16 || Op == AMDGPU::V_PK_MAX_NUM_BF16)
+ ? SISrcMods::OP_SEL_1
+ : 0u;
if (Src0Mods != UnsetMods && Src1Mods != UnsetMods)
return nullptr;
return Src0;