aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYe Luo <yeluo@anl.gov>2020-06-24 12:18:00 -0400
committerShilei Tian <tianshilei1992@gmail.com>2020-06-24 12:22:45 -0400
commit6e5f64c44f26465829e5aed38e900129ace6e64f (patch)
tree20166ff58e78d06eee5b67d2609c74170cf8847d
parent69d2fa9ed1c1aba6f473feb03cad257e69a0cf52 (diff)
downloadllvm-6e5f64c44f26465829e5aed38e900129ace6e64f.zip
llvm-6e5f64c44f26465829e5aed38e900129ace6e64f.tar.gz
llvm-6e5f64c44f26465829e5aed38e900129ace6e64f.tar.bz2
[OpenMP] Adopt std::set in HostDataToTargetMap
Summary: lookupMapping took significant time due to linear complexity searching. This is bad for offloading from multiple host threads because lookupMapping is protected by mutex. Use std::set for logarithmic complexity searching. Before my change. libomptarget inclusive time 16.7 sec, exclusive time 8.6 sec. After the change libomptarget inclusive time 7.3 sec, exclusive time 0.4 sec. Most of the overhead of libomptarget (exclusive time) is gone. Reviewers: jdoerfert, grokos Reviewed By: grokos Subscribers: tianshilei1992, yaxunl, guansong, sstefan1 Tags: #openmp Differential Revision: https://reviews.llvm.org/D82264
-rw-r--r--openmp/libomptarget/src/device.cpp100
-rw-r--r--openmp/libomptarget/src/device.h24
-rw-r--r--openmp/libomptarget/src/omptarget.cpp4
3 files changed, 75 insertions, 53 deletions
diff --git a/openmp/libomptarget/src/device.cpp b/openmp/libomptarget/src/device.cpp
index c526725..5753543 100644
--- a/openmp/libomptarget/src/device.cpp
+++ b/openmp/libomptarget/src/device.cpp
@@ -25,22 +25,20 @@ int DeviceTy::associatePtr(void *HstPtrBegin, void *TgtPtrBegin, int64_t Size) {
DataMapMtx.lock();
// Check if entry exists
- for (auto &HT : HostDataToTargetMap) {
- if ((uintptr_t)HstPtrBegin == HT.HstPtrBegin) {
- // Mapping already exists
- bool isValid = HT.HstPtrBegin == (uintptr_t) HstPtrBegin &&
- HT.HstPtrEnd == (uintptr_t) HstPtrBegin + Size &&
- HT.TgtPtrBegin == (uintptr_t) TgtPtrBegin;
- DataMapMtx.unlock();
- if (isValid) {
- DP("Attempt to re-associate the same device ptr+offset with the same "
- "host ptr, nothing to do\n");
- return OFFLOAD_SUCCESS;
- } else {
- DP("Not allowed to re-associate a different device ptr+offset with the "
- "same host ptr\n");
- return OFFLOAD_FAIL;
- }
+ auto search = HostDataToTargetMap.find(HstPtrBeginTy{(uintptr_t)HstPtrBegin});
+ if (search != HostDataToTargetMap.end()) {
+ // Mapping already exists
+ bool isValid = search->HstPtrEnd == (uintptr_t)HstPtrBegin + Size &&
+ search->TgtPtrBegin == (uintptr_t)TgtPtrBegin;
+ DataMapMtx.unlock();
+ if (isValid) {
+ DP("Attempt to re-associate the same device ptr+offset with the same "
+ "host ptr, nothing to do\n");
+ return OFFLOAD_SUCCESS;
+ } else {
+ DP("Not allowed to re-associate a different device ptr+offset with the "
+ "same host ptr\n");
+ return OFFLOAD_FAIL;
}
}
@@ -55,7 +53,7 @@ int DeviceTy::associatePtr(void *HstPtrBegin, void *TgtPtrBegin, int64_t Size) {
DPxMOD ", TgtBegin=" DPxMOD "\n", DPxPTR(newEntry.HstPtrBase),
DPxPTR(newEntry.HstPtrBegin), DPxPTR(newEntry.HstPtrEnd),
DPxPTR(newEntry.TgtPtrBegin));
- HostDataToTargetMap.push_front(newEntry);
+ HostDataToTargetMap.insert(newEntry);
DataMapMtx.unlock();
@@ -65,21 +63,17 @@ int DeviceTy::associatePtr(void *HstPtrBegin, void *TgtPtrBegin, int64_t Size) {
int DeviceTy::disassociatePtr(void *HstPtrBegin) {
DataMapMtx.lock();
- // Check if entry exists
- for (HostDataToTargetListTy::iterator ii = HostDataToTargetMap.begin();
- ii != HostDataToTargetMap.end(); ++ii) {
- if ((uintptr_t)HstPtrBegin == ii->HstPtrBegin) {
- // Mapping exists
- if (ii->isRefCountInf()) {
- DP("Association found, removing it\n");
- HostDataToTargetMap.erase(ii);
- DataMapMtx.unlock();
- return OFFLOAD_SUCCESS;
- } else {
- DP("Trying to disassociate a pointer which was not mapped via "
- "omp_target_associate_ptr\n");
- break;
- }
+ auto search = HostDataToTargetMap.find(HstPtrBeginTy{(uintptr_t)HstPtrBegin});
+ if (search != HostDataToTargetMap.end()) {
+ // Mapping exists
+ if (search->isRefCountInf()) {
+ DP("Association found, removing it\n");
+ HostDataToTargetMap.erase(search);
+ DataMapMtx.unlock();
+ return OFFLOAD_SUCCESS;
+ } else {
+ DP("Trying to disassociate a pointer which was not mapped via "
+ "omp_target_associate_ptr\n");
}
}
@@ -95,11 +89,14 @@ uint64_t DeviceTy::getMapEntryRefCnt(void *HstPtrBegin) {
uint64_t RefCnt = 0;
DataMapMtx.lock();
- for (auto &HT : HostDataToTargetMap) {
- if (hp >= HT.HstPtrBegin && hp < HT.HstPtrEnd) {
- DP("DeviceTy::getMapEntry: requested entry found\n");
- RefCnt = HT.getRefCount();
- break;
+ if (!HostDataToTargetMap.empty()) {
+ auto upper = HostDataToTargetMap.upper_bound(hp);
+ if (upper != HostDataToTargetMap.begin()) {
+ upper--;
+ if (hp >= upper->HstPtrBegin && hp < upper->HstPtrEnd) {
+ DP("DeviceTy::getMapEntry: requested entry found\n");
+ RefCnt = upper->getRefCount();
+ }
}
}
DataMapMtx.unlock();
@@ -117,21 +114,31 @@ LookupResult DeviceTy::lookupMapping(void *HstPtrBegin, int64_t Size) {
DP("Looking up mapping(HstPtrBegin=" DPxMOD ", Size=%ld)...\n", DPxPTR(hp),
Size);
- for (lr.Entry = HostDataToTargetMap.begin();
- lr.Entry != HostDataToTargetMap.end(); ++lr.Entry) {
+
+ if (HostDataToTargetMap.empty())
+ return lr;
+
+ auto upper = HostDataToTargetMap.upper_bound(hp);
+ // check the left bin
+ if (upper != HostDataToTargetMap.begin()) {
+ lr.Entry = std::prev(upper);
auto &HT = *lr.Entry;
// Is it contained?
lr.Flags.IsContained = hp >= HT.HstPtrBegin && hp < HT.HstPtrEnd &&
(hp+Size) <= HT.HstPtrEnd;
+ // Does it extend beyond the mapped region?
+ lr.Flags.ExtendsAfter = hp < HT.HstPtrEnd && (hp + Size) > HT.HstPtrEnd;
+ }
+
+ // check the right bin
+ if (!(lr.Flags.IsContained || lr.Flags.ExtendsAfter) &&
+ upper != HostDataToTargetMap.end()) {
+ lr.Entry = upper;
+ auto &HT = *lr.Entry;
// Does it extend into an already mapped region?
lr.Flags.ExtendsBefore = hp < HT.HstPtrBegin && (hp+Size) > HT.HstPtrBegin;
// Does it extend beyond the mapped region?
lr.Flags.ExtendsAfter = hp < HT.HstPtrEnd && (hp+Size) > HT.HstPtrEnd;
-
- if (lr.Flags.IsContained || lr.Flags.ExtendsBefore ||
- lr.Flags.ExtendsAfter) {
- break;
- }
}
if (lr.Flags.ExtendsBefore) {
@@ -203,8 +210,9 @@ void *DeviceTy::getOrAllocTgtPtr(void *HstPtrBegin, void *HstPtrBase,
DP("Creating new map entry: HstBase=" DPxMOD ", HstBegin=" DPxMOD ", "
"HstEnd=" DPxMOD ", TgtBegin=" DPxMOD "\n", DPxPTR(HstPtrBase),
DPxPTR(HstPtrBegin), DPxPTR((uintptr_t)HstPtrBegin + Size), DPxPTR(tp));
- HostDataToTargetMap.push_front(HostDataToTargetTy((uintptr_t)HstPtrBase,
- (uintptr_t)HstPtrBegin, (uintptr_t)HstPtrBegin + Size, tp));
+ HostDataToTargetMap.emplace(
+ HostDataToTargetTy((uintptr_t)HstPtrBase, (uintptr_t)HstPtrBegin,
+ (uintptr_t)HstPtrBegin + Size, tp));
rc = (void *)tp;
}
}
diff --git a/openmp/libomptarget/src/device.h b/openmp/libomptarget/src/device.h
index 1b7f776..309785a 100644
--- a/openmp/libomptarget/src/device.h
+++ b/openmp/libomptarget/src/device.h
@@ -18,6 +18,7 @@
#include <list>
#include <map>
#include <mutex>
+#include <set>
#include <vector>
// Forward declarations.
@@ -35,7 +36,8 @@ struct HostDataToTargetTy {
uintptr_t TgtPtrBegin; // target info.
private:
- uint64_t RefCount;
+ /// use mutable to allow modification via std::set iterator which is const.
+ mutable uint64_t RefCount;
static const uint64_t INFRefCount = ~(uint64_t)0;
public:
@@ -48,14 +50,14 @@ public:
return RefCount;
}
- uint64_t resetRefCount() {
+ uint64_t resetRefCount() const {
if (RefCount != INFRefCount)
RefCount = 1;
return RefCount;
}
- uint64_t incRefCount() {
+ uint64_t incRefCount() const {
if (RefCount != INFRefCount) {
++RefCount;
assert(RefCount < INFRefCount && "refcount overflow");
@@ -64,7 +66,7 @@ public:
return RefCount;
}
- uint64_t decRefCount() {
+ uint64_t decRefCount() const {
if (RefCount != INFRefCount) {
assert(RefCount > 0 && "refcount underflow");
--RefCount;
@@ -78,7 +80,19 @@ public:
}
};
-typedef std::list<HostDataToTargetTy> HostDataToTargetListTy;
+typedef uintptr_t HstPtrBeginTy;
+inline bool operator<(const HostDataToTargetTy &lhs, const HstPtrBeginTy &rhs) {
+ return lhs.HstPtrBegin < rhs;
+}
+inline bool operator<(const HstPtrBeginTy &lhs, const HostDataToTargetTy &rhs) {
+ return lhs < rhs.HstPtrBegin;
+}
+inline bool operator<(const HostDataToTargetTy &lhs,
+ const HostDataToTargetTy &rhs) {
+ return lhs.HstPtrBegin < rhs.HstPtrBegin;
+}
+
+typedef std::set<HostDataToTargetTy, std::less<>> HostDataToTargetListTy;
struct LookupResult {
struct {
diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp
index df7481a..cce9dbd 100644
--- a/openmp/libomptarget/src/omptarget.cpp
+++ b/openmp/libomptarget/src/omptarget.cpp
@@ -139,12 +139,12 @@ static int InitLibrary(DeviceTy& Device) {
DP("Add mapping from host " DPxMOD " to device " DPxMOD " with size %zu"
"\n", DPxPTR(CurrHostEntry->addr), DPxPTR(CurrDeviceEntry->addr),
CurrDeviceEntry->size);
- Device.HostDataToTargetMap.push_front(HostDataToTargetTy(
+ Device.HostDataToTargetMap.emplace(
(uintptr_t)CurrHostEntry->addr /*HstPtrBase*/,
(uintptr_t)CurrHostEntry->addr /*HstPtrBegin*/,
(uintptr_t)CurrHostEntry->addr + CurrHostEntry->size /*HstPtrEnd*/,
(uintptr_t)CurrDeviceEntry->addr /*TgtPtrBegin*/,
- true /*IsRefCountINF*/));
+ true /*IsRefCountINF*/);
}
}
Device.DataMapMtx.unlock();