diff options
author | Jessica Del <50999226+OutOfCache@users.noreply.github.com> | 2023-11-13 13:23:26 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-11-13 13:23:26 +0100 |
commit | b025864af8897bcc5791d93f5004cc1acbb397de (patch) | |
tree | 452e71f556d58a18a2c40b904859d148ea1de9ac /clang/lib/CodeGen | |
parent | 93ae26331592f41bf2b1d10b048743d80c468385 (diff) | |
download | llvm-b025864af8897bcc5791d93f5004cc1acbb397de.zip llvm-b025864af8897bcc5791d93f5004cc1acbb397de.tar.gz llvm-b025864af8897bcc5791d93f5004cc1acbb397de.tar.bz2 |
[AMDGPU] - Add clang builtins for tied WMMA intrinsics (#70669)
Add clang builtins for the new tied wmma intrinsics.
These variations tie the destination
accumulator matrix to the input
accumulator matrix.
See https://github.com/llvm/llvm-project/pull/69903 for context.
Diffstat (limited to 'clang/lib/CodeGen')
-rw-r--r-- | clang/lib/CodeGen/CGBuiltin.cpp | 14 |
1 files changed, 14 insertions, 0 deletions
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index a96bdf8..24fcf23 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -18024,9 +18024,13 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, } case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32: + case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32: case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64: + case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64: case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32: + case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32: case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64: + case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64: case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32: case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64: case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32: @@ -18064,6 +18068,16 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, ArgForMatchingRetType = 2; BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16; break; + case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32: + case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64: + ArgForMatchingRetType = 2; + BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16_tied; + break; + case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32: + case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64: + ArgForMatchingRetType = 2; + BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16_tied; + break; case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32: case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64: ArgForMatchingRetType = 4; |