diff options
author | Fraser Cormack <fraser@codeplay.com> | 2025-03-31 16:54:04 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-03-31 16:54:04 +0100 |
commit | 3fd0eaae52503ee2bbdffc75753acc4bcc72fe60 (patch) | |
tree | 5f7e46a46eb054ef072f661a7ed3f452a9f894e0 /llvm/lib/CodeGen/TargetLoweringBase.cpp | |
parent | ea06f7f96fb1ce5a77439cf1a26f97c2f2488648 (diff) | |
download | llvm-3fd0eaae52503ee2bbdffc75753acc4bcc72fe60.zip llvm-3fd0eaae52503ee2bbdffc75753acc4bcc72fe60.tar.gz llvm-3fd0eaae52503ee2bbdffc75753acc4bcc72fe60.tar.bz2 |
[libclc][amdgpu] Implement native_exp2 via AMD builtin (#133696)
This came up during a discussion on #129679, which has been split out as
a preparatory commit.
An example of the AMDGPU codegen is:
define <2 x float> @_Z10native_expDv2_f(<2 x float> %val) {
%mul = fmul afn <2 x float> %val, splat (float 0x3FF7154760000000)
%0 = extractelement <2 x float> %mul, i64 0
%1 = tail call float @llvm.amdgcn.exp2.f32(float %0)
%vecinit.i = insertelement <2 x float> poison, float %1, i64 0
%2 = extractelement <2 x float> %mul, i64 1
%3 = tail call float @llvm.amdgcn.exp2.f32(float %2)
%vecinit2.i = insertelement <2 x float> %vecinit.i, float %3, i64 1
ret <2 x float> %vecinit2.i
}
define <2 x float> @_Z11native_exp2Dv2_f(<2 x float> %x) {
%0 = extractelement <2 x float> %x, i64 0
%1 = tail call float @llvm.amdgcn.exp2.f32(float %0)
%vecinit = insertelement <2 x float> poison, float %1, i64 0
%2 = extractelement <2 x float> %x, i64 1
%3 = tail call float @llvm.amdgcn.exp2.f32(float %2)
%vecinit2 = insertelement <2 x float> %vecinit, float %3, i64 1
ret <2 x float> %vecinit2
}
Diffstat (limited to 'llvm/lib/CodeGen/TargetLoweringBase.cpp')
0 files changed, 0 insertions, 0 deletions