diff options
Diffstat (limited to 'offload/plugins-nextgen/cuda/src')
| -rw-r--r-- | offload/plugins-nextgen/cuda/src/rtl.cpp | 18 |
1 files changed, 9 insertions, 9 deletions
diff --git a/offload/plugins-nextgen/cuda/src/rtl.cpp b/offload/plugins-nextgen/cuda/src/rtl.cpp index a9adcc3..a27c6f3 100644 --- a/offload/plugins-nextgen/cuda/src/rtl.cpp +++ b/offload/plugins-nextgen/cuda/src/rtl.cpp @@ -379,6 +379,12 @@ struct CUDADeviceTy : public GenericDeviceTy { return Err; HardwareParallelism = NumMuliprocessors * (MaxThreadsPerSM / WarpSize); + uint32_t MaxSharedMem; + if (auto Err = getDeviceAttr( + CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK, MaxSharedMem)) + return Err; + MaxBlockSharedMemSize = MaxSharedMem; + return Plugin::success(); } @@ -1089,10 +1095,8 @@ struct CUDADeviceTy : public GenericDeviceTy { if (Res == CUDA_SUCCESS) Info.add("Total Constant Memory", TmpInt, "bytes"); - Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK, - TmpInt); - if (Res == CUDA_SUCCESS) - Info.add("Max Shared Memory per Block", TmpInt, "bytes"); + Info.add("Max Shared Memory per Block", MaxBlockSharedMemSize, "bytes", + DeviceInfo::WORK_GROUP_LOCAL_MEM_SIZE); Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK, TmpInt); if (Res == CUDA_SUCCESS) @@ -1235,11 +1239,6 @@ struct CUDADeviceTy : public GenericDeviceTy { return Info; } - virtual bool shouldSetupDeviceMemoryPool() const override { - /// We use the CUDA malloc for now. - return false; - } - /// Getters and setters for stack and heap sizes. Error getDeviceStackSize(uint64_t &Value) override { return getCtxLimit(CU_LIMIT_STACK_SIZE, Value); @@ -1247,6 +1246,7 @@ struct CUDADeviceTy : public GenericDeviceTy { Error setDeviceStackSize(uint64_t Value) override { return setCtxLimit(CU_LIMIT_STACK_SIZE, Value); } + bool hasDeviceHeapSize() override { return true; } Error getDeviceHeapSize(uint64_t &Value) override { return getCtxLimit(CU_LIMIT_MALLOC_HEAP_SIZE, Value); } |
