aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/CodeGen/TargetLoweringBase.cpp
diff options
context:
space:
mode:
authorFraser Cormack <fraser@codeplay.com>2025-03-31 16:54:04 +0100
committerGitHub <noreply@github.com>2025-03-31 16:54:04 +0100
commit3fd0eaae52503ee2bbdffc75753acc4bcc72fe60 (patch)
tree5f7e46a46eb054ef072f661a7ed3f452a9f894e0 /llvm/lib/CodeGen/TargetLoweringBase.cpp
parentea06f7f96fb1ce5a77439cf1a26f97c2f2488648 (diff)
downloadllvm-3fd0eaae52503ee2bbdffc75753acc4bcc72fe60.zip
llvm-3fd0eaae52503ee2bbdffc75753acc4bcc72fe60.tar.gz
llvm-3fd0eaae52503ee2bbdffc75753acc4bcc72fe60.tar.bz2
[libclc][amdgpu] Implement native_exp2 via AMD builtin (#133696)
This came up during a discussion on #129679, which has been split out as a preparatory commit. An example of the AMDGPU codegen is: define <2 x float> @_Z10native_expDv2_f(<2 x float> %val) { %mul = fmul afn <2 x float> %val, splat (float 0x3FF7154760000000) %0 = extractelement <2 x float> %mul, i64 0 %1 = tail call float @llvm.amdgcn.exp2.f32(float %0) %vecinit.i = insertelement <2 x float> poison, float %1, i64 0 %2 = extractelement <2 x float> %mul, i64 1 %3 = tail call float @llvm.amdgcn.exp2.f32(float %2) %vecinit2.i = insertelement <2 x float> %vecinit.i, float %3, i64 1 ret <2 x float> %vecinit2.i } define <2 x float> @_Z11native_exp2Dv2_f(<2 x float> %x) { %0 = extractelement <2 x float> %x, i64 0 %1 = tail call float @llvm.amdgcn.exp2.f32(float %0) %vecinit = insertelement <2 x float> poison, float %1, i64 0 %2 = extractelement <2 x float> %x, i64 1 %3 = tail call float @llvm.amdgcn.exp2.f32(float %2) %vecinit2 = insertelement <2 x float> %vecinit, float %3, i64 1 ret <2 x float> %vecinit2 }
Diffstat (limited to 'llvm/lib/CodeGen/TargetLoweringBase.cpp')
0 files changed, 0 insertions, 0 deletions