From b025864af8897bcc5791d93f5004cc1acbb397de Mon Sep 17 00:00:00 2001 From: Jessica Del <50999226+OutOfCache@users.noreply.github.com> Date: Mon, 13 Nov 2023 13:23:26 +0100 Subject: [AMDGPU] - Add clang builtins for tied WMMA intrinsics (#70669) Add clang builtins for the new tied wmma intrinsics. These variations tie the destination accumulator matrix to the input accumulator matrix. See https://github.com/llvm/llvm-project/pull/69903 for context. --- clang/lib/CodeGen/CGBuiltin.cpp | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'clang/lib/CodeGen') diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index a96bdf8..24fcf23 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -18024,9 +18024,13 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, } case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32: + case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32: case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64: + case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64: case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32: + case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32: case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64: + case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64: case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32: case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64: case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32: @@ -18064,6 +18068,16 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, ArgForMatchingRetType = 2; BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16; break; + case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32: + case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64: + ArgForMatchingRetType = 2; + BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16_tied; + break; + case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32: + case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64: + ArgForMatchingRetType = 2; + BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16_tied; + break; case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32: case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64: ArgForMatchingRetType = 4; -- cgit v1.1