diff options
author | Yaxun Liu <Yaxun.Liu@amd.com> | 2018-06-13 17:31:51 +0000 |
---|---|---|
committer | Yaxun Liu <Yaxun.Liu@amd.com> | 2018-06-13 17:31:51 +0000 |
commit | fb17bf60dd14a664bd7ee382e7404f4e8559fc5c (patch) | |
tree | 5bd727694cb6e4e2ba34515bbce9ec2fe2b0d7c5 | |
parent | 4bf9b5612a716167520e9c66606756d17b0c47b6 (diff) | |
download | llvm-fb17bf60dd14a664bd7ee382e7404f4e8559fc5c.zip llvm-fb17bf60dd14a664bd7ee382e7404f4e8559fc5c.tar.gz llvm-fb17bf60dd14a664bd7ee382e7404f4e8559fc5c.tar.bz2 |
[AMDGPU] Change enqueue kernel handle type
Currently the handle type is a global pointer which holds 8 bytes.
We need a larger type which hold 16 bytes, therefore change it
to [i64 x 2].
Differential Revision: https://reviews.llvm.org/D48094
llvm-svn: 334625
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp | 3 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/enqueue-kernel.ll | 10 |
2 files changed, 7 insertions, 6 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp index 4ff6c6e..7bd8533 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp @@ -36,6 +36,7 @@ #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/StringRef.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Mangler.h" #include "llvm/IR/Module.h" @@ -116,7 +117,7 @@ bool AMDGPUOpenCLEnqueuedBlockLowering::runOnModule(Module &M) { } LLVM_DEBUG(dbgs() << "found enqueued kernel: " << F.getName() << '\n'); auto RuntimeHandle = (F.getName() + ".runtime_handle").str(); - auto T = Type::getInt8Ty(C)->getPointerTo(AMDGPUAS::GLOBAL_ADDRESS); + auto T = ArrayType::get(Type::getInt64Ty(C), 2); auto *GV = new GlobalVariable( M, T, /*IsConstant=*/false, GlobalValue::ExternalLinkage, diff --git a/llvm/test/CodeGen/AMDGPU/enqueue-kernel.ll b/llvm/test/CodeGen/AMDGPU/enqueue-kernel.ll index 1e4b2ac..7358d9b 100644 --- a/llvm/test/CodeGen/AMDGPU/enqueue-kernel.ll +++ b/llvm/test/CodeGen/AMDGPU/enqueue-kernel.ll @@ -1,9 +1,9 @@ ; RUN: opt -data-layout=A5 -amdgpu-lower-enqueued-block -S < %s | FileCheck %s -; CHECK: @__test_block_invoke_kernel.runtime_handle = addrspace(1) global i8 addrspace(1)* null -; CHECK: @__test_block_invoke_2_kernel.runtime_handle = addrspace(1) global i8 addrspace(1)* null -; CHECK: @__amdgpu_enqueued_kernel.runtime_handle = addrspace(1) global i8 addrspace(1)* null -; CHECK: @__amdgpu_enqueued_kernel.1.runtime_handle = addrspace(1) global i8 addrspace(1)* null +; CHECK: @__test_block_invoke_kernel.runtime_handle = addrspace(1) global [2 x i64] zeroinitializer +; CHECK: @__test_block_invoke_2_kernel.runtime_handle = addrspace(1) global [2 x i64] zeroinitializer +; CHECK: @__amdgpu_enqueued_kernel.runtime_handle = addrspace(1) global [2 x i64] zeroinitializer +; CHECK: @__amdgpu_enqueued_kernel.1.runtime_handle = addrspace(1) global [2 x i64] zeroinitializer %struct.ndrange_t = type { i32 } %opencl.queue_t = type opaque @@ -84,7 +84,7 @@ entry: ; CHECK-LABEL: define amdgpu_kernel void @inlined_caller ; CHECK-SAME: #[[AT_CALLER]] ; CHECK-NOT: @__test_block_invoke_kernel -; CHECK: load i64, i64 addrspace(1)* bitcast (i8 addrspace(1)* addrspace(1)* @__test_block_invoke_kernel.runtime_handle to i64 addrspace(1)*) +; CHECK: load i64, i64 addrspace(1)* getelementptr inbounds ([2 x i64], [2 x i64] addrspace(1)* @__test_block_invoke_kernel.runtime_handle, i32 0, i32 0) define amdgpu_kernel void @inlined_caller(i8 addrspace(1)* %a, i8 %b, i64 addrspace(1)* %c, i64 %d) local_unnamed_addr !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 { entry: |