aboutsummaryrefslogtreecommitdiff
path: root/clang/lib/CodeGen
diff options
context:
space:
mode:
authorJessica Del <50999226+OutOfCache@users.noreply.github.com>2023-11-13 13:23:26 +0100
committerGitHub <noreply@github.com>2023-11-13 13:23:26 +0100
commitb025864af8897bcc5791d93f5004cc1acbb397de (patch)
tree452e71f556d58a18a2c40b904859d148ea1de9ac /clang/lib/CodeGen
parent93ae26331592f41bf2b1d10b048743d80c468385 (diff)
downloadllvm-b025864af8897bcc5791d93f5004cc1acbb397de.zip
llvm-b025864af8897bcc5791d93f5004cc1acbb397de.tar.gz
llvm-b025864af8897bcc5791d93f5004cc1acbb397de.tar.bz2
[AMDGPU] - Add clang builtins for tied WMMA intrinsics (#70669)
Add clang builtins for the new tied wmma intrinsics. These variations tie the destination accumulator matrix to the input accumulator matrix. See https://github.com/llvm/llvm-project/pull/69903 for context.
Diffstat (limited to 'clang/lib/CodeGen')
-rw-r--r--clang/lib/CodeGen/CGBuiltin.cpp14
1 files changed, 14 insertions, 0 deletions
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index a96bdf8..24fcf23 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -18024,9 +18024,13 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
}
case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
+ case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32:
case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
+ case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64:
case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
+ case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32:
case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:
+ case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64:
case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:
case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:
case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:
@@ -18064,6 +18068,16 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
ArgForMatchingRetType = 2;
BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16;
break;
+ case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32:
+ case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64:
+ ArgForMatchingRetType = 2;
+ BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16_tied;
+ break;
+ case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32:
+ case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64:
+ ArgForMatchingRetType = 2;
+ BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16_tied;
+ break;
case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:
case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64:
ArgForMatchingRetType = 4;